feat!: introduce parser introspection via the repo's Semantic Version baked in

refactor!: expose the allocator family of functions for consumption in scanners
2024-02-12 02:27:17 -05:00 · 2024-02-12 01:02:41 -05:00
583 changed files with 29263 additions and 58393 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@ -1,2 +0,0 @@
-[alias]
-xtask = "run --package xtask --"
--- a/.dockerignore
+++ b/.dockerignore
@ -1,2 +0,0 @@
-target
-.git
--- a/.editorconfig
+++ b/.editorconfig
@ -10,9 +10,6 @@ insert_final_newline = true
 [*.rs]
 indent_size = 4

-[*.{zig,zon}]
-indent_size = 4
-
 [Makefile]
 indent_style = tab
 indent_size = 8
--- a/.envrc
+++ b/.envrc
@ -1 +0,0 @@
-use flake
--- a/.gitattributes
+++ b/.gitattributes
@ -3,4 +3,5 @@
 /lib/src/unicode/*.h linguist-vendored
 /lib/src/unicode/LICENSE linguist-vendored

+/cli/src/generate/prepare_grammar/*.json -diff
 Cargo.lock -diff
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -1,15 +0,0 @@
-# These are supported funding model platforms
-
-github: tree-sitter
-patreon: # Replace with a single Patreon username
-open_collective: tree-sitter # Replace with a single Open Collective username
-ko_fi: amaanq
-tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
-community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
-liberapay: # Replace with a single Liberapay username
-issuehunt: # Replace with a single IssueHunt username
-lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
-polar: # Replace with a single Polar username
-buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
-thanks_dev: # Replace with a single thanks.dev username
-custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@ -1,6 +1,6 @@
 name: Bug Report
 description: Report a problem
-type: Bug
+labels: [bug]
 body:
  - type: textarea
    attributes:
@ -13,11 +13,9 @@ body:
    attributes:
      label: "Steps to reproduce"
      placeholder: |
-        ```sh
        git clone --depth=1 https://github.com/tree-sitter/tree-sitter-ruby
        cd tree-sitter-ruby
        tree-sitter generate
-        ```
    validations:
      required: true

--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@ -1,6 +1,6 @@
 name: Feature request
 description: Request an enhancement
-type: Feature
+labels: [enhancement]
 body:
  - type: markdown
    attributes:
--- a/.github/actions/cache/action.yml
+++ b/.github/actions/cache/action.yml
@ -1,25 +0,0 @@
-name: Cache
-
-description: This action caches fixtures
-
-outputs:
-  cache-hit:
-    description: Cache hit
-    value: ${{ steps.cache.outputs.cache-hit }}
-
-runs:
-  using: composite
-  steps:
-    - uses: actions/cache@v4
-      id: cache
-      with:
-        path: |
-          test/fixtures/grammars
-          target/release/tree-sitter-*.wasm
-        key: fixtures-${{ join(matrix.*, '_') }}-${{ hashFiles(
-          'crates/generate/src/**',
-          'lib/src/parser.h',
-          'lib/src/array.h',
-          'lib/src/alloc.h',
-          'test/fixtures/grammars/*/**/src/*.c',
-          '.github/actions/cache/action.yml') }}
--- a/.github/cliff.toml
+++ b/.github/cliff.toml
@ -1,72 +0,0 @@
-[changelog]
-# changelog header
-header = """
-# Changelog\n
-"""
-# template for the changelog body
-# https://tera.netlify.app/docs/#introduction
-body = """
-{% if version %}\
-    ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
-{% else %}\
-    ## [unreleased]
-{% endif %}\
-{% for group, commits in commits | group_by(attribute="group") %}
-    ### {{ group | striptags | upper_first }}
-    {% for commit in commits%}\
-        {% if not commit.scope %}\
-            - {{ commit.message | upper_first }}\
-              {% if commit.remote.pr_number %} (<https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}/pull/{{ commit.remote.pr_number }}>){%- endif %}
-        {% endif %}\
-    {% endfor %}\
-    {% for group, commits in commits | group_by(attribute="scope") %}\
-        {% for commit in commits %}\
-            - **{{commit.scope}}**: {{ commit.message | upper_first }}\
-                {% if commit.remote.pr_number %} (<https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}/pull/{{ commit.remote.pr_number }}>){%- endif %}
-        {% endfor %}\
-    {% endfor %}
-{% endfor %}
-"""
-# remove the leading and trailing whitespace from the template
-trim = true
-
-[git]
-# parse the commits based on https://www.conventionalcommits.org
-conventional_commits = true
-# filter out the commits that are not conventional
-filter_unconventional = false
-# process each line of a commit as an individual commit
-split_commits = false
-# regex for preprocessing the commit messages
-commit_preprocessors = [
-  #    { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/neovim/neovim/issues/${2}))"},
-]
-# regex for parsing and grouping commits
-commit_parsers = [
-  { message = "!:", group = "<!-- 0 -->Breaking" },
-  { message = "^feat", group = "<!-- 1 -->Features" },
-  { message = "^fix", group = "<!-- 2 -->Bug Fixes" },
-  { message = "^perf", group = "<!-- 3 -->Performance" },
-  { message = "^doc", group = "<!-- 4 -->Documentation" },
-  { message = "^refactor", group = "<!-- 5 -->Refactor" },
-  { message = "^test", group = "<!-- 6 -->Testing" },
-  { message = "^build", group = "<!-- 7 -->Build System and CI" },
-  { message = "^ci", group = "<!-- 7 -->Build System and CI" },
-  { message = ".*", group = "<!-- 8 -->Other" },
-]
-# filter out the commits that are not matched by commit parsers
-filter_commits = false
-# glob pattern for matching git tags
-tag_pattern = "v[0-9]*"
-# regex for skipping tags
-skip_tags = "v0.1.0-beta.1"
-# regex for ignoring tags
-ignore_tags = ""
-# sort the tags chronologically
-date_order = false
-# sort the commits inside sections by oldest/newest order
-sort_commits = "oldest"
-
-[remote.github]
-owner = "tree-sitter"
-repo = "tree-sitter"
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -4,50 +4,15 @@ updates:
    directory: "/"
    schedule:
      interval: "weekly"
-    cooldown:
-      default-days: 3
    commit-message:
      prefix: "build(deps)"
-    labels:
-      - "dependencies"
-      - "cargo"
-    groups:
-      cargo:
-        patterns: ["*"]
    ignore:
-      - dependency-name: "*"
-        update-types: ["version-update:semver-major", "version-update:semver-minor"]
+    - dependency-name: "*"
+      update-types: ["version-update:semver-patch"]

  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
-    cooldown:
-      default-days: 3
    commit-message:
      prefix: "ci"
-    labels:
-      - "dependencies"
-      - "github-actions"
-    groups:
-      actions:
-        patterns: ["*"]
-
-  - package-ecosystem: "npm"
-    versioning-strategy: increase
-    directories:
-      - "/crates/npm"
-      - "/crates/eslint"
-      - "/lib/binding_web"
-    schedule:
-      interval: "weekly"
-    cooldown:
-      default-days: 3
-    commit-message:
-      prefix: "build(deps)"
-    labels:
-      - "dependencies"
-      - "npm"
-    groups:
-      npm:
-        patterns: ["*"]
--- a/.github/scripts/close_spam.js
+++ b/.github/scripts/close_spam.js
@ -1,29 +0,0 @@
-module.exports = async ({ github, context }) => {
-  let target = context.payload.issue;
-  if (target) {
-    await github.rest.issues.update({
-      ...context.repo,
-      issue_number: target.number,
-      state: "closed",
-      state_reason: "not_planned",
-      title: "[spam]",
-      body: "",
-      type: null,
-    });
-  } else {
-    target = context.payload.pull_request;
-    await github.rest.pulls.update({
-      ...context.repo,
-      pull_number: target.number,
-      state: "closed",
-      title: "[spam]",
-      body: "",
-    });
-  }
-
-  await github.rest.issues.lock({
-    ...context.repo,
-    issue_number: target.number,
-    lock_reason: "spam",
-  });
-};
--- a/.github/scripts/cross.sh
+++ b/.github/scripts/cross.sh
@ -0,0 +1,17 @@
+#!/bin/bash
+
+# set -x
+set -e
+
+if [ "$BUILD_CMD" != "cross" ]; then
+    echo "cross.sh - is a helper to assist only in cross compiling environments" >&2
+    echo "To use this tool set the BUILD_CMD env var to the \"cross\" value" >&2
+    exit 111
+fi
+
+if [ -z "$CROSS_IMAGE" ]; then
+    echo "The CROSS_IMAGE env var should be provided" >&2
+    exit 111
+fi
+
+docker run --rm -v /home/runner:/home/runner -w "$PWD" "$CROSS_IMAGE" "$@"
--- a/.github/scripts/make.sh
+++ b/.github/scripts/make.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+
+# set -x
+set -e
+
+if [ "$BUILD_CMD" == "cross" ]; then
+    if [ -z "$CC" ]; then
+        echo "make.sh: CC is not set" >&2
+        exit 111
+    fi
+    if [ -z "$AR" ]; then
+        echo "make.sh: AR is not set" >&2
+        exit 111
+    fi
+
+    cross.sh make CC=$CC AR=$AR "$@"
+else
+    make "$@"
+fi
--- a/.github/scripts/reviewers_remove.js
+++ b/.github/scripts/reviewers_remove.js
@ -1,16 +0,0 @@
-module.exports = async ({ github, context }) => {
-  const requestedReviewers = await github.rest.pulls.listRequestedReviewers({
-    owner: context.repo.owner,
-    repo: context.repo.repo,
-    pull_number: context.issue.number,
-  });
-
-  const reviewers = requestedReviewers.data.users.map((e) => e.login);
-
-  github.rest.pulls.removeRequestedReviewers({
-    owner: context.repo.owner,
-    repo: context.repo.repo,
-    pull_number: context.issue.number,
-    reviewers: reviewers,
-  });
-};
--- a/.github/scripts/tree-sitter.sh
+++ b/.github/scripts/tree-sitter.sh
@ -0,0 +1,28 @@
+#!/bin/bash
+
+# set -x
+set -e
+
+if [ -z "$ROOT" ]; then
+    echo "The ROOT env var should be set to absolute path of a repo root folder" >&2
+    exit 111
+fi
+
+if [ -z "$TARGET" ]; then
+    echo "The TARGET env var should be equal to a \`cargo build --target <TARGET>\` command value" >&2
+    exit 111
+fi
+
+tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter
+
+if [ "$BUILD_CMD" == "cross" ]; then
+    if [ -z "$CROSS_RUNNER" ]; then
+        echo "The CROSS_RUNNER env var should be set to a CARGO_TARGET_*_RUNNER env var value" >&2
+        echo "that is available in a docker image used by the cross tool under the hood" >&2
+        exit 111
+    fi
+
+    cross.sh $CROSS_RUNNER "$tree_sitter" "$@"
+else
+    "$tree_sitter" "$@"
+fi
--- a/.github/scripts/wasm_stdlib.js
+++ b/.github/scripts/wasm_stdlib.js
@ -1,25 +0,0 @@
-module.exports = async ({ github, context, core }) => {
-  if (context.eventName !== 'pull_request') return;
-
-  const prNumber = context.payload.pull_request.number;
-  const owner = context.repo.owner;
-  const repo = context.repo.repo;
-
-  const { data: files } = await github.rest.pulls.listFiles({
-    owner,
-    repo,
-    pull_number: prNumber
-  });
-
-  const changedFiles = files.map(file => file.filename);
-
-  const wasmStdLibSrc = 'crates/language/wasm/';
-  const dirChanged = changedFiles.some(file => file.startsWith(wasmStdLibSrc));
-
-  if (!dirChanged) return;
-
-  const wasmStdLibHeader = 'lib/src/wasm/wasm-stdlib.h';
-  const requiredChanged = changedFiles.includes(wasmStdLibHeader);
-
-  if (!requiredChanged) core.setFailed(`Changes detected in ${wasmStdLibSrc} but ${wasmStdLibHeader} was not modified.`);
-};
--- a/.github/workflows/backport.yml
+++ b/.github/workflows/backport.yml
@ -1,31 +0,0 @@
-name: Backport Pull Request
-
-on:
-  pull_request_target:
-    types: [closed, labeled]
-
-permissions:
-  contents: write
-  pull-requests: write
-
-jobs:
-  backport:
-    if: github.event.pull_request.merged
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Create app token
-        uses: actions/create-github-app-token@v2
-        id: app-token
-        with:
-          app-id: ${{ vars.BACKPORT_APP }}
-          private-key: ${{ secrets.BACKPORT_KEY }}
-
-      - name: Create backport PR
-        uses: korthout/backport-action@v4
-        with:
-          pull_title: "${pull_title}"
-          label_pattern: "^ci:backport ([^ ]+)$"
-          github_token: ${{ steps.app-token.outputs.token }}
--- a/.github/workflows/bindgen.yml
+++ b/.github/workflows/bindgen.yml
@ -1,30 +0,0 @@
-name: Check Bindgen Output
-
-on:
-  pull_request:
-    paths:
-      - lib/include/tree_sitter/api.h
-      - lib/binding_rust/bindings.rs
-  push:
-    branches: [master]
-    paths:
-      - lib/include/tree_sitter/api.h
-      - lib/binding_rust/bindings.rs
-
-jobs:
-  check-bindgen:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Set up stable Rust toolchain
-        uses: actions-rust-lang/setup-rust-toolchain@v1
-        with:
-          toolchain: stable
-
-      - name: Generate bindings
-        run: cargo xtask generate-bindings
-
-      - name: Check if the bindgen output changed
-        run: git diff --exit-code lib/binding_rust/bindings.rs
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -1,9 +1,14 @@
 name: Build & Test

+env:
+  CARGO_TERM_COLOR: always
+  RUSTFLAGS: "-D warnings"
+  CROSS_DEBUG: 1
+
 on:
  workflow_call:
    inputs:
-      run-test:
+      run_test:
        default: true
        type: boolean

@ -16,296 +21,180 @@ jobs:
      fail-fast: false
      matrix:
        platform:
-          - linux-arm64
-          - linux-arm
-          - linux-x64
-          - linux-x86
-          - linux-powerpc64
-          - windows-arm64
-          - windows-x64
-          - windows-x86
-          - macos-arm64
-          - macos-x64
-          - wasm32
+        - linux-arm64       #
+        - linux-arm         #
+        - linux-x64         #
+        - linux-x86         #
+        - linux-powerpc64   #
+        - windows-arm64     #
+        - windows-x64       # <-- No C library build - requires an additional adapted Makefile for `cl.exe` compiler
+        - windows-x86       #     -- // --
+        - macos-arm64       #
+        - macos-x64         #

        include:
-          # When adding a new `target`:
-          # 1. Define a new platform alias above
-          # 2. Add a new record to the matrix map in `crates/cli/npm/install.js`
-          - { platform: linux-arm64     , target: aarch64-unknown-linux-gnu     , os: ubuntu-24.04-arm }
-          - { platform: linux-arm       , target: armv7-unknown-linux-gnueabihf , os: ubuntu-24.04-arm }
-          - { platform: linux-x64       , target: x86_64-unknown-linux-gnu      , os: ubuntu-24.04     }
-          - { platform: linux-x86       , target: i686-unknown-linux-gnu        , os: ubuntu-24.04     }
-          - { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu   , os: ubuntu-24.04     }
-          - { platform: windows-arm64   , target: aarch64-pc-windows-msvc       , os: windows-11-arm   }
-          - { platform: windows-x64     , target: x86_64-pc-windows-msvc        , os: windows-2025     }
-          - { platform: windows-x86     , target: i686-pc-windows-msvc          , os: windows-2025     }
-          - { platform: macos-arm64     , target: aarch64-apple-darwin          , os: macos-15         }
-          - { platform: macos-x64       , target: x86_64-apple-darwin           , os: macos-15-intel   }
-          - { platform: wasm32          , target: wasm32-unknown-unknown        , os: ubuntu-24.04     }
+        # When adding a new `target`:
+        # 1. Define a new platform alias above
+        # 2. Add a new record to a matrix map in `cli/npm/install.js`
+        - { platform: linux-arm64       , target: aarch64-unknown-linux-gnu           , os: ubuntu-latest  , use-cross: true   }
+        - { platform: linux-arm         , target: arm-unknown-linux-gnueabi           , os: ubuntu-latest  , use-cross: true   }
+        - { platform: linux-x64         , target: x86_64-unknown-linux-gnu            , os: ubuntu-20.04   , enable-wasm: true } #2272
+        - { platform: linux-x86         , target: i686-unknown-linux-gnu              , os: ubuntu-latest  , use-cross: true   }
+        - { platform: linux-powerpc64   , target: powerpc64-unknown-linux-gnu         , os: ubuntu-latest  , use-cross: true   }
+        - { platform: windows-arm64     , target: aarch64-pc-windows-msvc             , os: windows-latest                     }
+        - { platform: windows-x64       , target: x86_64-pc-windows-msvc              , os: windows-latest , enable-wasm: true }
+        - { platform: windows-x86       , target: i686-pc-windows-msvc                , os: windows-latest                     }
+        - { platform: macos-arm64       , target: aarch64-apple-darwin                , os: macos-14       , enable-wasm: true }
+        - { platform: macos-x64         , target: x86_64-apple-darwin                 , os: macos-latest   , enable-wasm: true }

-          # Extra features
-          - { platform: linux-arm64     , features: wasm }
-          - { platform: linux-x64       , features: wasm }
-          - { platform: macos-arm64     , features: wasm }
-          - { platform: macos-x64       , features: wasm }
+        # Cross compilers for C library
+        - { platform: linux-arm64       , cc: aarch64-linux-gnu-gcc             , ar: aarch64-linux-gnu-ar           }
+        - { platform: linux-arm         , cc: arm-linux-gnueabi-gcc             , ar: arm-linux-gnueabi-ar           }
+        - { platform: linux-x86         , cc: i686-linux-gnu-gcc                , ar: i686-linux-gnu-ar              }
+        - { platform: linux-powerpc64   , cc: powerpc64-linux-gnu-gcc           , ar: powerpc64-linux-gnu-ar         }

-          # Cross-compilation
-          - { platform: linux-arm       , cross: true }
-          - { platform: linux-x86       , cross: true }
-          - { platform: linux-powerpc64 , cross: true }
+        # See #2041 tree-sitter issue
+        - { platform: windows-x64   , rust-test-threads: 1 }
+        - { platform: windows-x86   , rust-test-threads: 1 }

-          # Compile-only
-          - { platform: wasm32          , no-run: true }
+        # CLI only build
+        - { platform: windows-arm64 , cli-only: true }

    env:
-      CARGO_TERM_COLOR: always
-      RUSTFLAGS: -D warnings
+      BUILD_CMD: cargo
+      EMSCRIPTEN_VERSION: ""
+      EXE: ${{ contains(matrix.target, 'windows') && '.exe' || '' }}

    defaults:
      run:
        shell: bash

    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v6
+    - uses: actions/checkout@v4

-    - name: Set up cross-compilation
-      if: matrix.cross
+    - name: Read Emscripten version
      run: |
-        for target in armv7-unknown-linux-gnueabihf i686-unknown-linux-gnu powerpc64-unknown-linux-gnu; do
-          camel_target=${target//-/_}; target_cc=${target/-unknown/}
-          printf 'CC_%s=%s\n' "$camel_target" "${target_cc/v7/}-gcc"
-          printf 'AR_%s=%s\n' "$camel_target" "${target_cc/v7/}-ar"
-          printf 'CARGO_TARGET_%s_LINKER=%s\n' "${camel_target^^}" "${target_cc/v7/}-gcc"
-        done >> $GITHUB_ENV
-        {
-          printf 'CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm -L /usr/arm-linux-gnueabihf\n'
-          printf 'CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64 -L /usr/powerpc64-linux-gnu\n'
-        } >> $GITHUB_ENV
-
-    - name: Get emscripten version
-      if: contains(matrix.features, 'wasm')
-      run: printf 'EMSCRIPTEN_VERSION=%s\n' "$(<crates/loader/emscripten-version)" >> $GITHUB_ENV
+        echo "EMSCRIPTEN_VERSION=$(cat cli/loader/emscripten-version)" >> $GITHUB_ENV

    - name: Install Emscripten
-      if: contains(matrix.features, 'wasm')
+      if: ${{ !matrix.cli-only && !matrix.use-cross }}
      uses: mymindstorm/setup-emsdk@v14
      with:
        version: ${{ env.EMSCRIPTEN_VERSION }}

-    - name: Set up Rust
-      uses: actions-rust-lang/setup-rust-toolchain@v1
+    - run: rustup toolchain install stable --profile minimal
+    - run: rustup target add ${{ matrix.target }}
+    - uses: Swatinem/rust-cache@v2
+
+    - name: Install cross
+      if: ${{ matrix.use-cross }}
+      uses: taiki-e/install-action@v2
      with:
-        target: ${{ matrix.target }}
+        tool: cross

-    - name: Install cross-compilation toolchain
-      if: matrix.cross
+    - name: Build custom cross image
+      if: ${{ matrix.use-cross && matrix.os == 'ubuntu-latest' }}
      run: |
-        sudo apt-get update -qy
-        if [[ $PLATFORM == linux-arm ]]; then
-          sudo apt-get install -qy {binutils,gcc}-arm-linux-gnueabihf qemu-user
-        elif [[ $PLATFORM == linux-x86 ]]; then
-          sudo apt-get install -qy {binutils,gcc}-i686-linux-gnu
-        elif [[ $PLATFORM == linux-powerpc64 ]]; then
-          sudo apt-get install -qy {binutils,gcc}-powerpc64-linux-gnu qemu-user
+        cd ..
+
+        target="${{ matrix.target }}"
+        image=ghcr.io/cross-rs/$target:custom
+        echo "CROSS_IMAGE=$image"                              >> $GITHUB_ENV
+
+        echo "[target.$target]"                                >> Cross.toml
+        echo "image = \"$image\""                              >> Cross.toml
+        echo "CROSS_CONFIG=$PWD/Cross.toml"                    >> $GITHUB_ENV
+
+        echo "FROM ghcr.io/cross-rs/$target:edge"              >> Dockerfile
+        echo "ENV DEBIAN_FRONTEND=noninteractive"              >> Dockerfile
+        echo "RUN apt-get update && apt-get install -y nodejs" >> Dockerfile
+        docker build -t $image .
+
+    - name: Setup env extras
+      env:
+        RUST_TEST_THREADS: ${{ matrix.rust-test-threads || '' }}
+        USE_CROSS: ${{ matrix.use-cross }}
+        TARGET: ${{ matrix.target }}
+        CC: ${{ matrix.cc }}
+        AR: ${{ matrix.ar }}
+        IS_WINDOWS: ${{ contains(matrix.os, 'windows') }}
+        ENABLE_WASM: ${{ matrix.enable-wasm }}
+      run: |
+        PATH="$PWD/.github/scripts:$PATH"
+        echo "$PWD/.github/scripts" >> $GITHUB_PATH
+
+        echo "TREE_SITTER=tree-sitter.sh" >> $GITHUB_ENV
+        echo "TARGET=$TARGET" >> $GITHUB_ENV
+        echo "ROOT=$PWD" >> $GITHUB_ENV
+
+        [ -n "$RUST_TEST_THREADS" ] && \
+        echo "RUST_TEST_THREADS=$RUST_TEST_THREADS" >> $GITHUB_ENV
+
+        [ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV
+        [ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV
+
+        [ "$IS_WINDOWS" = "false" ] && echo "CFLAGS=-Werror" >> $GITHUB_ENV
+
+        if [ "$ENABLE_WASM" == "true" ]; then
+          echo "CLI_FEATURES=wasm" >> $GITHUB_ENV
        fi
-      env:
-        PLATFORM: ${{ matrix.platform }}

-    - name: Install MinGW and Clang (Windows x64 MSYS2)
-      if: matrix.platform == 'windows-x64'
-      uses: msys2/setup-msys2@v2
-      with:
-        update: true
-        install: |
-          mingw-w64-x86_64-toolchain
-          mingw-w64-x86_64-clang
-          mingw-w64-x86_64-make
-          mingw-w64-x86_64-cmake
-
-    # TODO: Remove RUSTFLAGS="--cap-lints allow" once we use a wasmtime release that addresses
-    # the `mismatched-lifetime-syntaxes` lint
-    - name: Build wasmtime library (Windows x64 MSYS2)
-      if: contains(matrix.features, 'wasm') && matrix.platform == 'windows-x64'
-      run: |
-        mkdir -p target
-        WASMTIME_VERSION=$(cargo metadata --format-version=1 --locked --features wasm | \
-                           jq -r '.packages[] | select(.name == "wasmtime-c-api-impl") | .version')
-        curl -LSs "$WASMTIME_REPO/archive/refs/tags/v${WASMTIME_VERSION}.tar.gz" | tar xzf - -C target
-        cd target/wasmtime-${WASMTIME_VERSION}
-        cmake -S crates/c-api -B target/c-api \
-          -DCMAKE_INSTALL_PREFIX="$PWD/artifacts" \
-          -DWASMTIME_DISABLE_ALL_FEATURES=ON \
-          -DWASMTIME_FEATURE_CRANELIFT=ON \
-          -DWASMTIME_TARGET='x86_64-pc-windows-gnu'
-        cmake --build target/c-api && cmake --install target/c-api
-        printf 'CMAKE_PREFIX_PATH=%s\n' "$PWD/artifacts" >> $GITHUB_ENV
-      env:
-        WASMTIME_REPO: https://github.com/bytecodealliance/wasmtime
-        RUSTFLAGS: ${{ env.RUSTFLAGS }} --cap-lints allow
-
-    - name: Build C library (Windows x64 MSYS2 CMake)
-      if: matrix.platform == 'windows-x64'
-      shell: msys2 {0}
-      run: |
-        cmake -G Ninja -S . -B build/static \
-          -DBUILD_SHARED_LIBS=OFF \
-          -DCMAKE_BUILD_TYPE=Debug \
-          -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
-          -DTREE_SITTER_FEATURE_WASM=$WASM \
-          -DCMAKE_C_COMPILER=clang
-        cmake --build build/static
-
-        cmake -G Ninja -S . -B build/shared \
-          -DBUILD_SHARED_LIBS=ON \
-          -DCMAKE_BUILD_TYPE=Debug \
-          -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
-          -DTREE_SITTER_FEATURE_WASM=$WASM \
-          -DCMAKE_C_COMPILER=clang
-        cmake --build build/shared
-        rm -rf \
-          build/{static,shared} \
-          "${CMAKE_PREFIX_PATH}/artifacts" \
-          target/wasmtime-${WASMTIME_VERSION}
-      env:
-        WASM: ${{ contains(matrix.features, 'wasm') && 'ON' || 'OFF' }}
-
-    # TODO: Remove RUSTFLAGS="--cap-lints allow" once we use a wasmtime release that addresses
-    # the `mismatched-lifetime-syntaxes` lint
-    - name: Build wasmtime library
-      if: contains(matrix.features, 'wasm')
-      run: |
-        mkdir -p target
-        WASMTIME_VERSION=$(cargo metadata --format-version=1 --locked --features wasm | \
-                           jq -r '.packages[] | select(.name == "wasmtime-c-api-impl") | .version')
-        curl -LSs "$WASMTIME_REPO/archive/refs/tags/v${WASMTIME_VERSION}.tar.gz" | tar xzf - -C target
-        cd target/wasmtime-${WASMTIME_VERSION}
-        cmake -S crates/c-api -B target/c-api \
-          -DCMAKE_INSTALL_PREFIX="$PWD/artifacts" \
-          -DWASMTIME_DISABLE_ALL_FEATURES=ON \
-          -DWASMTIME_FEATURE_CRANELIFT=ON \
-          -DWASMTIME_TARGET='${{ matrix.target }}'
-        cmake --build target/c-api && cmake --install target/c-api
-        printf 'CMAKE_PREFIX_PATH=%s\n' "$PWD/artifacts" >> $GITHUB_ENV
-      env:
-        WASMTIME_REPO: https://github.com/bytecodealliance/wasmtime
-        RUSTFLAGS: ${{ env.RUSTFLAGS }} --cap-lints allow
-
-    - name: Build C library (make)
-      if: runner.os != 'Windows'
-      run: |
-        if [[ $PLATFORM == linux-arm ]]; then
-          CC=arm-linux-gnueabihf-gcc; AR=arm-linux-gnueabihf-ar
-        elif [[ $PLATFORM == linux-x86 ]]; then
-          CC=i686-linux-gnu-gcc; AR=i686-linux-gnu-ar
-        elif [[ $PLATFORM == linux-powerpc64 ]]; then
-          CC=powerpc64-linux-gnu-gcc; AR=powerpc64-linux-gnu-ar
-        else
-          CC=gcc; AR=ar
+        if [ "$USE_CROSS" == "true" ]; then
+          echo "BUILD_CMD=cross" >> $GITHUB_ENV
+          runner=$(BUILD_CMD=cross cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'")
+          [ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV
        fi
-        make -j CFLAGS="$CFLAGS" CC=$CC AR=$AR
-      env:
-        PLATFORM: ${{ matrix.platform }}
-        CFLAGS: -g -Werror -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types

-    - name: Build C library (CMake)
-      if: "!matrix.cross"
-      run: |
-        cmake -S . -B build/static \
-          -DBUILD_SHARED_LIBS=OFF \
-          -DCMAKE_BUILD_TYPE=Debug \
-          -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
-          -DTREE_SITTER_FEATURE_WASM=$WASM
-        cmake --build build/static --verbose
+    - name: Build C library
+      if: ${{ !contains(matrix.os, 'windows') }} # Requires an additional adapted Makefile for `cl.exe` compiler
+      run: make.sh -j

-        cmake -S . -B build/shared \
-          -DBUILD_SHARED_LIBS=ON \
-          -DCMAKE_BUILD_TYPE=Debug \
-          -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
-          -DTREE_SITTER_FEATURE_WASM=$WASM
-        cmake --build build/shared --verbose
-      env:
-        CC: ${{ contains(matrix.platform, 'linux') && 'clang' || '' }}
-        WASM: ${{ contains(matrix.features, 'wasm') && 'ON' || 'OFF' }}
+    - name: Build wasm library
+      if: ${{ !matrix.cli-only && !matrix.use-cross }} # No sense to build on the same Github runner hosts many times
+      run: script/build-wasm

-    - name: Build Wasm library
-      if: contains(matrix.features, 'wasm')
-      shell: bash
-      run: |
-        cd lib/binding_web
-        npm ci
-        CJS=true npm run build
-        CJS=true npm run build:debug
-        npm run build
-        npm run build:debug
-
-    - name: Check no_std builds
-      if: inputs.run-test && !matrix.no-run
-      working-directory: lib
-      shell: bash
-      run: cargo check --no-default-features --target='${{ matrix.target }}'
-
-    - name: Build target
-      run: cargo build --release --target='${{ matrix.target }}' --features='${{ matrix.features }}' $PACKAGE
-      env:
-        PACKAGE: ${{ matrix.platform == 'wasm32' && '-p tree-sitter' || '' }}
-
-    - name: Cache fixtures
-      id: cache
-      if: inputs.run-test && !matrix.no-run
-      uses: ./.github/actions/cache
+    - name: Build CLI
+      run: $BUILD_CMD build --release --target=${{ matrix.target }} --features=${CLI_FEATURES}

    - name: Fetch fixtures
-      if: inputs.run-test && !matrix.no-run
-      run: cargo run -p xtask --target='${{ matrix.target }}' -- fetch-fixtures
+      if: ${{ !matrix.cli-only && inputs.run_test }} # Don't fetch fixtures for only CLI building targets
+      run: script/fetch-fixtures

    - name: Generate fixtures
-      if: inputs.run-test && !matrix.no-run && steps.cache.outputs.cache-hit != 'true'
-      run: cargo run -p xtask --target='${{ matrix.target }}' -- generate-fixtures
+      if: ${{ !matrix.cli-only && inputs.run_test }} # Can't natively run CLI on Github runner's host
+      run: script/generate-fixtures

-    - name: Generate Wasm fixtures
-      if: inputs.run-test && !matrix.no-run && contains(matrix.features, 'wasm') && steps.cache.outputs.cache-hit != 'true'
-      run: cargo run -p xtask --target='${{ matrix.target }}' -- generate-fixtures --wasm
+    - name: Generate WASM fixtures
+      if: ${{ !matrix.cli-only && !matrix.use-cross && inputs.run_test }} # See comment for the "Build wasm library" step
+      run: script/generate-fixtures-wasm

    - name: Run main tests
-      if: inputs.run-test && !matrix.no-run
-      run: cargo test --target='${{ matrix.target }}' --features='${{ matrix.features }}'
+      if: ${{ !matrix.cli-only && inputs.run_test }} # Can't natively run CLI on Github runner's host
+      run: $BUILD_CMD test --target=${{ matrix.target }} --features=${CLI_FEATURES}

-    - name: Run Wasm tests
-      if: inputs.run-test && !matrix.no-run && contains(matrix.features, 'wasm')
-      run: cargo run -p xtask --target='${{ matrix.target }}' -- test-wasm
+    - name: Run wasm tests
+      if: ${{ !matrix.cli-only && !matrix.use-cross && inputs.run_test }} # See comment for the "Build wasm library" step
+      run: script/test-wasm
+
+    - name: Run benchmarks
+      if: ${{ !matrix.cli-only && !matrix.use-cross && inputs.run_test }} # Cross-compiled benchmarks make no sense
+      run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.target }}

    - name: Upload CLI artifact
-      if: "!matrix.no-run"
-      uses: actions/upload-artifact@v6
+      uses: actions/upload-artifact@v4
      with:
        name: tree-sitter.${{ matrix.platform }}
-        path: target/${{ matrix.target }}/release/tree-sitter${{ contains(matrix.target, 'windows') && '.exe' || '' }}
+        path: target/${{ matrix.target }}/release/tree-sitter${{ env.EXE }}
        if-no-files-found: error
        retention-days: 7

-    - name: Upload Wasm artifacts
-      if: matrix.platform == 'linux-x64'
-      uses: actions/upload-artifact@v6
+    - name: Upload WASM artifacts
+      if: ${{ matrix.platform == 'linux-x64' }}
+      uses: actions/upload-artifact@v4
      with:
        name: tree-sitter.wasm
        path: |
-          lib/binding_web/web-tree-sitter.js
-          lib/binding_web/web-tree-sitter.js.map
-          lib/binding_web/web-tree-sitter.cjs
-          lib/binding_web/web-tree-sitter.cjs.map
-          lib/binding_web/web-tree-sitter.wasm
-          lib/binding_web/web-tree-sitter.wasm.map
-          lib/binding_web/debug/web-tree-sitter.cjs
-          lib/binding_web/debug/web-tree-sitter.cjs.map
-          lib/binding_web/debug/web-tree-sitter.js
-          lib/binding_web/debug/web-tree-sitter.js.map
-          lib/binding_web/debug/web-tree-sitter.wasm
-          lib/binding_web/debug/web-tree-sitter.wasm.map
-          lib/binding_web/lib/*.c
-          lib/binding_web/lib/*.h
-          lib/binding_web/lib/*.ts
-          lib/binding_web/src/*.ts
+          lib/binding_web/tree-sitter.js
+          lib/binding_web/tree-sitter.wasm
        if-no-files-found: error
        retention-days: 7
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@ -0,0 +1,24 @@
+name: Full Rust codebase checks
+
+on:
+  workflow_call:
+
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - run: rustup toolchain install stable --profile minimal
+    - uses: Swatinem/rust-cache@v2
+
+    - run: make lint
+
+  check_c_warnings:
+    name: Check C warnings
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Make C library to check that it's able to compile without warnings
+      run: make -j CFLAGS="-Werror"
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -2,48 +2,22 @@ name: CI

 on:
  pull_request:
-    paths-ignore:
-      - docs/**
-      - "**/README.md"
-      - CONTRIBUTING.md
-      - LICENSE
-      - cli/src/templates
  push:
-    branches: [master]
-    paths-ignore:
-      - docs/**
-      - "**/README.md"
-      - CONTRIBUTING.md
-      - LICENSE
-      - cli/src/templates
+    branches:
+      - 'master'

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: ${{ github.event_name != 'push' }}
+  cancel-in-progress: true

 jobs:
  checks:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Set up stable Rust toolchain
-        uses: actions-rust-lang/setup-rust-toolchain@v1
-        with:
-          toolchain: stable
-          components: clippy, rustfmt
-
-      - name: Lint files
-        run: |
-          make lint
-          make lint-web
+    uses: ./.github/workflows/checks.yml

  sanitize:
+    needs: checks
    uses: ./.github/workflows/sanitize.yml

  build:
+    needs: checks
    uses: ./.github/workflows/build.yml
-
-  check-wasm-stdlib:
-    uses: ./.github/workflows/wasm_stdlib.yml
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@ -1,50 +0,0 @@
-name: Deploy Docs
-on:
-  push:
-    branches: [master]
-    paths: [docs/**]
-  workflow_dispatch:
-
-jobs:
-  deploy-docs:
-    runs-on: ubuntu-latest
-
-    permissions:
-      contents: write
-      pages: write
-      id-token: write
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Set up Rust
-        uses: actions-rust-lang/setup-rust-toolchain@v1
-
-      - name: Install mdbook
-        env:
-          GH_TOKEN: ${{ github.token }}
-        run: |
-          jq_expr='.assets[] | select(.name | contains("x86_64-unknown-linux-gnu")) | .browser_download_url'
-          url=$(gh api repos/rust-lang/mdbook/releases/tags/v0.4.52 --jq "$jq_expr")
-          mkdir mdbook
-          curl -sSL "$url" | tar -xz -C mdbook
-          printf '%s/mdbook\n' "$PWD" >> "$GITHUB_PATH"
-
-      - name: Install mdbook-admonish
-        run: cargo install mdbook-admonish
-
-      - name: Build Book
-        run: mdbook build docs
-
-      - name: Setup Pages
-        uses: actions/configure-pages@v5
-
-      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v4
-        with:
-          path: docs/book
-
-      - name: Deploy to GitHub Pages
-        id: deployment
-        uses: actions/deploy-pages@v4
--- a/.github/workflows/nvim_ts.yml
+++ b/.github/workflows/nvim_ts.yml
@ -1,69 +0,0 @@
-name: nvim-treesitter parser tests
-
-on:
-  pull_request:
-    paths:
-      - 'crates/cli/**'
-      - 'crates/config/**'
-      - 'crates/generate/**'
-      - 'crates/loader/**'
-      - '.github/workflows/nvim_ts.yml'
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  check_compilation:
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
-        type: [generate, build]
-    name: ${{ matrix.os }} - ${{ matrix.type }}
-    runs-on: ${{ matrix.os }}
-    env:
-      NVIM: ${{ matrix.os == 'windows-latest' && 'nvim-win64\\bin\\nvim.exe' || 'nvim' }}
-      NVIM_TS_DIR: nvim-treesitter
-    steps:
-      - uses: actions/checkout@v6
-
-      - uses: actions/checkout@v6
-        with:
-          repository: nvim-treesitter/nvim-treesitter
-          path: ${{ env.NVIM_TS_DIR }}
-          ref: main
-
-      - if: runner.os != 'Windows'
-        run: echo ${{ github.workspace }}/target/release >> $GITHUB_PATH
-
-      - if: runner.os == 'Windows'
-        run: echo ${{ github.workspace }}/target/release >> $env:GITHUB_PATH
-
-      - uses: actions-rust-lang/setup-rust-toolchain@v1
-      - run: cargo build --release
-      - uses: ilammy/msvc-dev-cmd@v1
-
-      - name: Install and prepare Neovim
-        run: bash ./scripts/ci-install.sh
-        working-directory: ${{ env.NVIM_TS_DIR }}
-
-      - if: matrix.type == 'generate'
-        name: Generate and compile parsers
-        run: $NVIM -l ./scripts/install-parsers.lua --generate --max-jobs=2
-        working-directory: ${{ env.NVIM_TS_DIR }}
-        shell: bash
-
-      - if: matrix.type == 'build'
-        name: Compile parsers
-        run: $NVIM -l ./scripts/install-parsers.lua --max-jobs=10
-        working-directory: ${{ env.NVIM_TS_DIR }}
-        shell: bash
-
-      - if: "!cancelled()"
-        name: Check query files
-        run: $NVIM -l ./scripts/check-queries.lua
-        working-directory: ${{ env.NVIM_TS_DIR }}
-        shell: bash
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -1,5 +1,4 @@
 name: Release
-
 on:
  workflow_dispatch:
  push:
@ -10,22 +9,19 @@ jobs:
  build:
    uses: ./.github/workflows/build.yml
    with:
-      run-test: false
+      run_test: false

  release:
-    name: Release on GitHub
+    name: Release
    runs-on: ubuntu-latest
    needs: build
    permissions:
-      id-token: write
-      attestations: write
      contents: write
    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4

      - name: Download build artifacts
-        uses: actions/download-artifact@v7
+        uses: actions/download-artifact@v4
        with:
          path: artifacts

@ -35,13 +31,9 @@ jobs:

      - name: Prepare release artifacts
        run: |
-          mkdir -p target web
-          mv artifacts/tree-sitter.wasm/* web/
-
-          tar -czf target/web-tree-sitter.tar.gz -C web .
-
+          mkdir -p target
+          mv artifacts/tree-sitter.wasm/* target/
          rm -r artifacts/tree-sitter.wasm
-
          for platform in $(cd artifacts; ls | sed 's/^tree-sitter\.//'); do
            exe=$(ls artifacts/tree-sitter.$platform/tree-sitter*)
            gzip --stdout --name $exe > target/tree-sitter-$platform.gz
@ -49,81 +41,56 @@ jobs:
          rm -rf artifacts
          ls -l target/

-      - name: Generate attestations
-        uses: actions/attest-build-provenance@v3
-        with:
-          subject-path: |
-            target/tree-sitter-*.gz
-            target/web-tree-sitter.tar.gz
-
      - name: Create release
-        run: |-
-          gh release create $GITHUB_REF_NAME \
-            target/tree-sitter-*.gz \
-            target/web-tree-sitter.tar.gz
-        env:
-          GH_TOKEN: ${{ github.token }}
+        uses: softprops/action-gh-release@v1
+        with:
+          name: ${{ github.ref_name }}
+          tag_name: ${{ github.ref_name }}
+          fail_on_unmatched_files: true
+          files: |
+            target/tree-sitter-*.gz
+            target/tree-sitter.wasm
+            target/tree-sitter.js

  crates_io:
-    name: Publish packages to Crates.io
+    name: Publish CLI to Crates.io
    runs-on: ubuntu-latest
-    environment: crates
-    permissions:
-      id-token: write
-      contents: read
    needs: release
    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4

-      - name: Set up Rust
-        uses: actions-rust-lang/setup-rust-toolchain@v1
+      - name: Setup Rust
+        uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true

-      - name: Set up registry token
-        id: auth
-        uses: rust-lang/crates-io-auth-action@v1
-
-      - name: Publish crates to Crates.io
+      - name: Publish CLI to Crates.io
        uses: katyo/publish-crates@v2
        with:
-          registry-token: ${{ steps.auth.outputs.token }}
+          registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}

  npm:
-    name: Publish packages to npmjs.com
+    name: Publish lib to npmjs.com
    runs-on: ubuntu-latest
-    environment: npm
-    permissions:
-      id-token: write
-      contents: read
    needs: release
    strategy:
      fail-fast: false
      matrix:
-        directory: [crates/cli/npm, lib/binding_web]
+        directory: ["cli/npm", "lib/binding_web"]
    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4

-      - name: Set up Node
-        uses: actions/setup-node@v6
+      - name: Setup Node
+        uses: actions/setup-node@v4
        with:
-          node-version: 24
-          registry-url: https://registry.npmjs.org
+          node-version: 18
+          registry-url: "https://registry.npmjs.org"

-      - name: Set up Rust
-        uses: actions-rust-lang/setup-rust-toolchain@v1
-
-      - name: Build wasm
-        if: matrix.directory == 'lib/binding_web'
+      - name: Publish lib to npmjs.com
+        env:
+          NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
        run: |
          cd ${{ matrix.directory }}
-          npm ci
-          npm run build
-          npm run build:debug
-          CJS=true npm run build
-          CJS=true npm run build:debug
-          npm run build:dts
-
-      - name: Publish to npmjs.com
-        working-directory: ${{ matrix.directory }}
-        run: npm publish
+          npm publish
--- a/.github/workflows/response.yml
+++ b/.github/workflows/response.yml
@ -1,47 +1,34 @@
-name: No response
-
+name: no_response
 on:
  schedule:
-    - cron: "30 1 * * *" # Run every day at 01:30
+    - cron: '30 1 * * *' # Run every day at 01:30
  workflow_dispatch:
  issue_comment:

-permissions:
-  issues: write
-  pull-requests: write
-
 jobs:
  close:
-    name: Close issues with no response
    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
    steps:
-      - name: Checkout script
-        uses: actions/checkout@v6
-        with:
-          sparse-checkout: .github/scripts/close_unresponsive.js
-          sparse-checkout-cone-mode: false
-
-      - name: Run script
-        uses: actions/github-script@v8
+      - uses: actions/checkout@v4
+      - uses: actions/github-script@v7
        with:
          script: |
            const script = require('./.github/scripts/close_unresponsive.js')
            await script({github, context})

  remove_label:
-    name: Remove response label
    if: github.event_name == 'issue_comment'
    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
    steps:
-      - name: Checkout script
-        uses: actions/checkout@v6
-        with:
-          sparse-checkout: .github/scripts/remove_response_label.js
-          sparse-checkout-cone-mode: false
-
-      - name: Run script
-        uses: actions/github-script@v8
+      - uses: actions/checkout@v4
+      - uses: actions/github-script@v7
        with:
          script: |
            const script = require('./.github/scripts/remove_response_label.js')
--- a/.github/workflows/reviewers_remove.yml
+++ b/.github/workflows/reviewers_remove.yml
@ -1,25 +0,0 @@
-name: Remove Reviewers
-
-on:
-  pull_request_target:
-    types: [converted_to_draft, closed]
-
-permissions:
-  pull-requests: write
-
-jobs:
-  remove-reviewers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout script
-        uses: actions/checkout@v6
-        with:
-          sparse-checkout: .github/scripts/reviewers_remove.js
-          sparse-checkout-cone-mode: false
-
-      - name: Run script
-        uses: actions/github-script@v8
-        with:
-          script: |
-            const script = require('./.github/scripts/reviewers_remove.js')
-            await script({github, context})
--- a/.github/workflows/sanitize.yml
+++ b/.github/workflows/sanitize.yml
@ -2,50 +2,49 @@ name: Sanitize

 env:
  CARGO_TERM_COLOR: always
-  RUSTFLAGS: -D warnings
+  RUSTFLAGS: "-D warnings"

 on:
  workflow_call:

 jobs:
-  check-undefined-behaviour:
+  check_undefined_behaviour:
+    name: Sanitizer checks
    runs-on: ubuntu-latest
-    timeout-minutes: 20
    env:
      TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter
    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
+    - name: Checkout source code
+      uses: actions/checkout@v4

-      - name: Install UBSAN library
-        run: sudo apt-get update -y && sudo apt-get install -y libubsan1
+    - name: Install UBSAN library
+      run: sudo apt-get update -y && sudo apt-get install -y libubsan1

-      - name: Set up Rust
-        uses: actions-rust-lang/setup-rust-toolchain@v1
+    - run: rustup toolchain install stable --profile minimal
+    - uses: Swatinem/rust-cache@v2

-      - name: Build project
-        run: cargo build --release
+    - name: Build CLI
+      run: cargo build --release

-      - name: Cache fixtures
-        uses: ./.github/actions/cache
-        id: cache
+    - name: Fetch fixtures
+      run: script/fetch-fixtures

-      - name: Fetch fixtures
-        run: cargo xtask fetch-fixtures
+    - name: Generate fixtures
+      run: script/generate-fixtures

-      - name: Generate fixtures
-        if: ${{ steps.cache.outputs.cache-hit != 'true' }}
-        run: cargo xtask generate-fixtures
+    - name: Run main tests with undefined behaviour sanitizer (UBSAN)
+      env:
+        UBSAN_OPTIONS: halt_on_error=1
+        CFLAGS: -fsanitize=undefined
+        RUSTFLAGS: ${{ env.RUSTFLAGS }} -lubsan
+      run: cargo test -- --test-threads 1

-      - name: Run main tests with undefined behaviour sanitizer (UBSAN)
-        run: cargo test -- --test-threads 1
-        env:
-          CFLAGS: -fsanitize=undefined
-          RUSTFLAGS: ${{ env.RUSTFLAGS }} -lubsan
-
-      - name: Run main tests with address sanitizer (ASAN)
-        run: cargo test -- --test-threads 1
-        env:
-          ASAN_OPTIONS: verify_asan_link_order=0
-          CFLAGS: -fsanitize=address
-          RUSTFLAGS: ${{ env.RUSTFLAGS }} -lasan --cfg sanitizing
+    - name: Run main tests with address sanitizer (ASAN)
+      env:
+        ASAN_OPTIONS: halt_on_error=1
+        CFLAGS: -fsanitize=address
+        RUSTFLAGS: ${{ env.RUSTFLAGS }} -Zsanitizer=address --cfg=sanitizing
+      run: |
+        rustup install nightly
+        rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu
+        cargo +nightly test -Z build-std --target x86_64-unknown-linux-gnu -- --test-threads 1
--- a/.github/workflows/spam.yml
+++ b/.github/workflows/spam.yml
@ -1,29 +0,0 @@
-name: Close as spam
-
-on:
-  issues:
-    types: [labeled]
-  pull_request_target:
-    types: [labeled]
-
-permissions:
-  issues: write
-  pull-requests: write
-
-jobs:
-  spam:
-    runs-on: ubuntu-latest
-    if: github.event.label.name == 'spam'
-    steps:
-      - name: Checkout script
-        uses: actions/checkout@v6
-        with:
-          sparse-checkout: .github/scripts/close_spam.js
-          sparse-checkout-cone-mode: false
-
-      - name: Run script
-        uses: actions/github-script@v8
-        with:
-          script: |
-            const script = require('./.github/scripts/close_spam.js')
-            await script({github, context})
--- a/.github/workflows/wasm_exports.yml
+++ b/.github/workflows/wasm_exports.yml
@ -1,41 +0,0 @@
-name: Check Wasm Exports
-
-on:
-  pull_request:
-    paths:
-      - lib/include/tree_sitter/api.h
-      - lib/binding_web/**
-      - xtask/src/**
-  push:
-    branches: [master]
-    paths:
-      - lib/include/tree_sitter/api.h
-      - lib/binding_rust/bindings.rs
-      - CMakeLists.txt
-
-jobs:
-  check-wasm-exports:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Set up stable Rust toolchain
-        uses: actions-rust-lang/setup-rust-toolchain@v1
-        with:
-          toolchain: stable
-
-      - name: Install wasm-objdump
-        run: sudo apt-get update -y && sudo apt-get install -y wabt
-
-      - name: Build C library (make)
-        run: make -j CFLAGS="$CFLAGS"
-        env:
-          CFLAGS: -g -Werror -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types
-
-      - name: Build Wasm Library
-        working-directory: lib/binding_web
-        run: npm ci && npm run build:debug
-
-      - name: Check Wasm exports
-        run: cargo xtask check-wasm-exports
--- a/.github/workflows/wasm_stdlib.yml
+++ b/.github/workflows/wasm_stdlib.yml
@ -1,19 +0,0 @@
-name: Check Wasm Stdlib build
-
-on:
-  workflow_call:
-
-jobs:
-  check:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Check directory changes
-        uses: actions/github-script@v8
-        with:
-          script: |
-            const scriptPath = `${process.env.GITHUB_WORKSPACE}/.github/scripts/wasm_stdlib.js`;
-            const script = require(scriptPath);
-            return script({ github, context, core });
--- a/.gitignore
+++ b/.gitignore
@ -1,40 +1,30 @@
 log*.html
-.direnv

 .idea
 *.xcodeproj
 .vscode
 .cache
-.zig-cache
-.direnv

-profile*
 fuzz-results
-test/fuzz/out
+
+/tree-sitter.pc
 test/fixtures/grammars/*
 !test/fixtures/grammars/.gitkeep
-
+package-lock.json
 node_modules

 docs/assets/js/tree-sitter.js

-/tree-sitter.pc
-
 /target
 *.rs.bk
 *.a
 *.dylib
 *.so
 *.so.[0-9]*
-*.dll
 *.o
 *.obj
 *.exp
 *.lib
 *.wasm
 .swiftpm
-.build
-build
 zig-*
-
-/result
--- a/.zed/settings.json
+++ b/.zed/settings.json
@ -1,11 +0,0 @@
-{
-  "lsp": {
-    "rust-analyzer": {
-      "initialization_options": {
-        "cargo": {
-          "features": "all"
-        }
-      }
-    }
-  }
-}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,95 +0,0 @@
-cmake_minimum_required(VERSION 3.13)
-
-project(tree-sitter
-        VERSION "0.27.0"
-        DESCRIPTION "An incremental parsing system for programming tools"
-        HOMEPAGE_URL "https://tree-sitter.github.io/tree-sitter/"
-        LANGUAGES C)
-
-option(BUILD_SHARED_LIBS "Build using shared libraries" ON)
-option(TREE_SITTER_FEATURE_WASM "Enable the Wasm feature" OFF)
-option(AMALGAMATED "Build using an amalgamated source" OFF)
-
-if(AMALGAMATED)
-  set(TS_SOURCE_FILES "${PROJECT_SOURCE_DIR}/lib/src/lib.c")
-else()
-  file(GLOB TS_SOURCE_FILES lib/src/*.c)
-  list(REMOVE_ITEM TS_SOURCE_FILES "${PROJECT_SOURCE_DIR}/lib/src/lib.c")
-endif()
-
-add_library(tree-sitter ${TS_SOURCE_FILES})
-
-target_include_directories(tree-sitter PRIVATE lib/src lib/src/wasm PUBLIC lib/include)
-
-if(MSVC)
-  target_compile_options(tree-sitter PRIVATE
-                         /wd4018 # disable 'signed/unsigned mismatch'
-                         /wd4232 # disable 'nonstandard extension used'
-                         /wd4244 # disable 'possible loss of data'
-                         /wd4267 # disable 'possible loss of data (size_t)'
-                         /wd4701 # disable 'potentially uninitialized local variable'
-                         /we4022 # treat 'incompatible types' as an error
-                         /W4)
-else()
-  target_compile_options(tree-sitter PRIVATE
-                         -Wall -Wextra -Wshadow -Wpedantic
-                         -Werror=incompatible-pointer-types)
-endif()
-
-if(TREE_SITTER_FEATURE_WASM)
-  if(NOT DEFINED CACHE{WASMTIME_INCLUDE_DIR})
-    message(CHECK_START "Looking for wasmtime headers")
-    find_path(WASMTIME_INCLUDE_DIR wasmtime.h
-              PATHS ENV DEP_WASMTIME_C_API_INCLUDE)
-    if(NOT WASMTIME_INCLUDE_DIR)
-      unset(WASMTIME_INCLUDE_DIR CACHE)
-      message(FATAL_ERROR "Could not find wasmtime headers.\nDid you forget to set CMAKE_INCLUDE_PATH?")
-    endif()
-    message(CHECK_PASS "found")
-  endif()
-
-  if(NOT DEFINED CACHE{WASMTIME_LIBRARY})
-    message(CHECK_START "Looking for wasmtime library")
-    find_library(WASMTIME_LIBRARY wasmtime)
-    if(NOT WASMTIME_LIBRARY)
-      unset(WASMTIME_LIBRARY CACHE)
-      message(FATAL_ERROR "Could not find wasmtime library.\nDid you forget to set CMAKE_LIBRARY_PATH?")
-    endif()
-    message(CHECK_PASS "found")
-  endif()
-
-  target_compile_definitions(tree-sitter PUBLIC TREE_SITTER_FEATURE_WASM)
-  target_include_directories(tree-sitter SYSTEM PRIVATE "${WASMTIME_INCLUDE_DIR}")
-  target_link_libraries(tree-sitter PUBLIC "${WASMTIME_LIBRARY}")
-  set_property(TARGET tree-sitter PROPERTY C_STANDARD_REQUIRED ON)
-
-  if(NOT BUILD_SHARED_LIBS)
-    if(WIN32)
-      target_compile_definitions(tree-sitter PRIVATE WASM_API_EXTERN= WASI_API_EXTERN=)
-      target_link_libraries(tree-sitter INTERFACE ws2_32 advapi32 userenv ntdll shell32 ole32 bcrypt)
-    elseif(NOT APPLE)
-      target_link_libraries(tree-sitter INTERFACE pthread dl m)
-    endif()
-  endif()
-endif()
-
-set_target_properties(tree-sitter
-                      PROPERTIES
-                      C_STANDARD 11
-                      C_VISIBILITY_PRESET hidden
-                      POSITION_INDEPENDENT_CODE ON
-                      SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
-                      DEFINE_SYMBOL "")
-
-target_compile_definitions(tree-sitter PRIVATE _POSIX_C_SOURCE=200112L _DEFAULT_SOURCE _BSD_SOURCE _DARWIN_C_SOURCE)
-
-include(GNUInstallDirs)
-
-configure_file(lib/tree-sitter.pc.in "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter.pc" @ONLY)
-
-install(FILES lib/include/tree_sitter/api.h
-        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/tree_sitter")
-install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter.pc"
-        DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
-install(TARGETS tree-sitter
-        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -1 +0,0 @@
-See [docs/src/6-contributing.md](./docs/src/6-contributing.md)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1 @@
+docs/section-6-contributing.md
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,85 +1,10 @@
 [workspace]
-default-members = ["crates/cli"]
-members = [
-  "crates/cli",
-  "crates/config",
-  "crates/generate",
-  "crates/highlight",
-  "crates/loader",
-  "crates/tags",
-  "crates/xtask",
-  "crates/language",
-  "lib",
-]
+default-members = ["cli"]
+members = ["cli", "cli/config", "cli/loader", "lib", "tags", "highlight"]
 resolver = "2"

 [workspace.package]
-version = "0.27.0"
-authors = [
-  "Max Brunsfeld <maxbrunsfeld@gmail.com>",
-  "Amaan Qureshi <amaanq12@gmail.com>",
-]
-edition = "2021"
-rust-version = "1.85"
-homepage = "https://tree-sitter.github.io/tree-sitter"
-repository = "https://github.com/tree-sitter/tree-sitter"
-license = "MIT"
-keywords = ["incremental", "parsing"]
-categories = ["command-line-utilities", "parsing"]
-
-[workspace.lints.clippy]
-dbg_macro = "deny"
-todo = "deny"
-pedantic = { level = "warn", priority = -1 }
-nursery = { level = "warn", priority = -1 }
-cargo = { level = "warn", priority = -1 }
-
-# The lints below are a specific subset of the pedantic+nursery lints
-# that we explicitly allow in the tree-sitter codebase because they either:
-#
-# 1. Contain false positives,
-# 2. Are unnecessary, or
-# 3. Worsen the code
-
-branches_sharing_code = "allow"
-cast_lossless = "allow"
-cast_possible_truncation = "allow"
-cast_possible_wrap = "allow"
-cast_precision_loss = "allow"
-cast_sign_loss = "allow"
-checked_conversions = "allow"
-cognitive_complexity = "allow"
-collection_is_never_read = "allow"
-fallible_impl_from = "allow"
-fn_params_excessive_bools = "allow"
-inline_always = "allow"
-if_not_else = "allow"
-items_after_statements = "allow"
-match_wildcard_for_single_variants = "allow"
-missing_errors_doc = "allow"
-missing_panics_doc = "allow"
-module_name_repetitions = "allow"
-multiple_crate_versions = "allow"
-needless_for_each = "allow"
-obfuscated_if_else = "allow"
-option_if_let_else = "allow"
-or_fun_call = "allow"
-range_plus_one = "allow"
-redundant_clone = "allow"
-redundant_closure_for_method_calls = "allow"
-ref_option = "allow"
-similar_names = "allow"
-string_lit_as_bytes = "allow"
-struct_excessive_bools = "allow"
-struct_field_names = "allow"
-transmute_undefined_repr = "allow"
-too_many_lines = "allow"
-unnecessary_wraps = "allow"
-unused_self = "allow"
-used_underscore_items = "allow"
-
-[workspace.lints.rust]
-mismatched_lifetime_syntaxes = "allow"
+rust-version = "1.70"

 [profile.optimize]
 inherits = "release"
@ -92,72 +17,52 @@ codegen-units = 1    # Maximum size reduction optimizations.
 inherits = "optimize"
 opt-level = "s"       # Optimize for size.

-[profile.release-dev]
-inherits = "release"
-lto = false
-debug = true
-debug-assertions = true
-overflow-checks = true
-incremental = true
-codegen-units = 256
+[profile.profile]
+inherits = "optimize"
+strip = false

 [workspace.dependencies]
-ansi_colours = "1.2.3"
-anstyle = "1.0.13"
-anyhow = "1.0.100"
-bstr = "1.12.0"
-cc = "1.2.53"
-clap = { version = "4.5.54", features = [
-  "cargo",
-  "derive",
-  "env",
-  "help",
-  "string",
-  "unstable-styles",
+ansi_term = "0.12.1"
+anstyle = "1.0.6"
+anyhow = "1.0.79"
+cc = "1.0.83"
+clap = { version = "4.4.18", features = [
+	"cargo",
+	"derive",
+	"env",
+	"help",
+	"unstable-styles",
 ] }
-clap_complete = "4.5.65"
-clap_complete_nushell = "4.5.10"
-crc32fast = "1.5.0"
-ctor = "0.2.9"
-ctrlc = { version = "3.5.0", features = ["termination"] }
-dialoguer = { version = "0.11.0", features = ["fuzzy-select"] }
-etcetera = "0.11.0"
-fs4 = "0.12.0"
-glob = "0.3.3"
-heck = "0.5.0"
+ctor = "0.2.6"
+ctrlc = { version = "3.4.2", features = ["termination"] }
+difference = "2.0.0"
+dirs = "5.0.1"
+glob = "0.3.1"
 html-escape = "0.2.13"
-indexmap = "2.12.1"
-indoc = "2.0.6"
-libloading = "0.9.0"
-log = { version = "0.4.28", features = ["std"] }
-memchr = "2.7.6"
-once_cell = "1.21.3"
-pretty_assertions = "1.4.1"
+indexmap = "2.2.2"
+indoc = "2.0.4"
+lazy_static = "1.4.0"
+libloading = "0.8.1"
+log = { version = "0.4.20", features = ["std"] }
+memchr = "2.7.1"
+once_cell = "1.19.0"
+path-slash = "0.2.1"
+pretty_assertions = "1.4.0"
 rand = "0.8.5"
-regex = "1.11.3"
-regex-syntax = "0.8.6"
-rustc-hash = "2.1.1"
-schemars = "1.0.5"
-semver = { version = "1.0.27", features = ["serde"] }
-serde = { version = "1.0.219", features = ["derive"] }
-serde_json = { version = "1.0.149", features = ["preserve_order"] }
-similar = "2.7.0"
-smallbitvec = "2.6.0"
-streaming-iterator = "0.1.9"
-tempfile = "3.23.0"
-thiserror = "2.0.17"
+regex = "1.10.3"
+regex-syntax = "0.8.2"
+rustc-hash = "1.1.0"
+semver = "1.0.21"
+serde = { version = "1.0.196", features = ["derive"] }
+serde_derive = "1.0.196"
+serde_json = { version = "1.0.113", features = ["preserve_order"] }
+smallbitvec = "2.5.1"
+tempfile = "3.10.0"
+thiserror = "1.0.56"
 tiny_http = "0.12.0"
-topological-sort = "0.2.2"
-unindent = "0.2.4"
-walkdir = "2.5.0"
-wasmparser = "0.243.0"
-webbrowser = "1.0.5"
-
-tree-sitter = { version = "0.27.0", path = "./lib" }
-tree-sitter-generate = { version = "0.27.0", path = "./crates/generate" }
-tree-sitter-loader = { version = "0.27.0", path = "./crates/loader" }
-tree-sitter-config = { version = "0.27.0", path = "./crates/config" }
-tree-sitter-highlight = { version = "0.27.0", path = "./crates/highlight" }
-tree-sitter-tags = { version = "0.27.0", path = "./crates/tags" }
-
-tree-sitter-language = { version = "0.1", path = "./crates/language" }
+toml = "0.8.10"
+unindent = "0.2.3"
+walkdir = "2.4.0"
+wasmparser = "0.121.0"
+webbrowser = "0.8.12"
+which = "6.0.0"
--- a/10
+++ b/10
@ -1,10 +0,0 @@
-FROM rust:1.76-buster
-
-WORKDIR /app
-
-RUN apt-get update
-RUN apt-get install -y nodejs
-
-COPY . .
-
-CMD cargo test --all-features
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 The MIT License (MIT)

-Copyright (c) 2018 Max Brunsfeld
+Copyright (c) 2018-2023 Max Brunsfeld

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/121
+++ b/121
@ -1,12 +1,9 @@
-VERSION := 0.27.0
-DESCRIPTION := An incremental parsing system for programming tools
-HOMEPAGE_URL := https://tree-sitter.github.io/tree-sitter/
+VERSION := 0.20.10

 # install directory layout
 PREFIX ?= /usr/local
 INCLUDEDIR ?= $(PREFIX)/include
 LIBDIR ?= $(PREFIX)/lib
-BINDIR ?= $(PREFIX)/bin
 PCLIBDIR ?= $(LIBDIR)/pkgconfig

 # collect sources
@ -21,119 +18,83 @@ endif
 OBJ := $(SRC:.c=.o)

 # define default flags, and override to append mandatory flags
-ARFLAGS := rcs
-CFLAGS ?= -O3 -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types
-override CFLAGS += -std=c11 -fPIC -fvisibility=hidden
-override CFLAGS += -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_DARWIN_C_SOURCE
+override CFLAGS := -O3 -std=gnu11 -fPIC -fvisibility=hidden -Wall -Wextra -Wshadow -pedantic $(CFLAGS)
 override CFLAGS += -Ilib/src -Ilib/src/wasm -Ilib/include

 # ABI versioning
-SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION)))
-SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION)))
+SONAME_MAJOR := 0
+SONAME_MINOR := 0

 # OS-specific bits
-MACHINE := $(shell $(CC) -dumpmachine)
-
-ifneq ($(findstring darwin,$(MACHINE)),)
+ifeq ($(shell uname),Darwin)
 	SOEXT = dylib
-	SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT)
-	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT)
-	LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SOEXTVER)
-else ifneq ($(findstring mingw32,$(MACHINE)),)
-	SOEXT = dll
-	LINKSHARED += -s -shared -Wl,--out-implib,libtree-sitter.dll.a
+	SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
+	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
+	LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib
 else
 	SOEXT = so
-	SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR)
-	SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR)
-	LINKSHARED += -shared -Wl,-soname,libtree-sitter.$(SOEXTVER)
-ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
+	SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
+	SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
+	LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR)
+endif
+ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
 	PCLIBDIR := $(PREFIX)/libdata/pkgconfig
 endif
-endif

-all: libtree-sitter.a libtree-sitter.$(SOEXT) tree-sitter.pc
+all: libtree-sitter.a libtree-sitter.$(SOEXTVER)

 libtree-sitter.a: $(OBJ)
-	$(AR) $(ARFLAGS) $@ $^
+	$(AR) rcs $@ $^

-libtree-sitter.$(SOEXT): $(OBJ)
+libtree-sitter.$(SOEXTVER): $(OBJ)
 	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
+	ln -sf $@ libtree-sitter.$(SOEXT)
+	ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR)
 ifneq ($(STRIP),)
 	$(STRIP) $@
 endif

-ifneq ($(findstring mingw32,$(MACHINE)),)
-libtree-sitter.dll.a: libtree-sitter.$(SOEXT)
-endif
+install: all
+	sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
+	    -e 's|=$(PREFIX)|=$${prefix}|' \
+	    -e 's|@PREFIX@|$(PREFIX)|' \
+	    tree-sitter.pc.in > tree-sitter.pc

-tree-sitter.pc: lib/tree-sitter.pc.in
-	sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \
-		-e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \
-		-e 's|@CMAKE_INSTALL_INCLUDEDIR@|$(INCLUDEDIR:$(PREFIX)/%=%)|' \
-		-e 's|@PROJECT_DESCRIPTION@|$(DESCRIPTION)|' \
-		-e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \
-		-e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@
+	install -d '$(DESTDIR)$(LIBDIR)'
+	install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/
+	install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/
+	ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR)
+	ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT)

-shared: libtree-sitter.$(SOEXT)
+	install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
+	install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/

-static: libtree-sitter.a
+	install -d '$(DESTDIR)$(PCLIBDIR)'
+	install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/

 clean:
-	$(RM) $(OBJ) tree-sitter.pc libtree-sitter.a libtree-sitter.$(SOEXT) libtree-stitter.dll.a
+	rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER)

-install: all
-	install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
-	install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h
-	install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
-	install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a
-ifneq ($(findstring mingw32,$(MACHINE)),)
-	install -d '$(DESTDIR)$(BINDIR)'
-	install -m755 libtree-sitter.dll '$(DESTDIR)$(BINDIR)'/libtree-sitter.dll
-	install -m755 libtree-sitter.dll.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.dll.a
-else
-	install -m755 libtree-sitter.$(SOEXT) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER)
-	cd '$(DESTDIR)$(LIBDIR)' && ln -sf libtree-sitter.$(SOEXTVER) libtree-sitter.$(SOEXTVER_MAJOR)
-	cd '$(DESTDIR)$(LIBDIR)' && ln -sf libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXT)
-endif
-
-uninstall:
-	$(RM) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a \
-		'$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) \
-		'$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) \
-		'$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) \
-		'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h \
-		'$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
-	rmdir '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
-
-.PHONY: all shared static install uninstall clean
+.PHONY: all install clean


 ##### Dev targets #####

 test:
-	cargo xtask fetch-fixtures
-	cargo xtask generate-fixtures
-	cargo xtask test
+	script/fetch-fixtures
+	script/generate-fixtures
+	script/test

-test-wasm:
-	cargo xtask generate-fixtures --wasm
-	cargo xtask test-wasm
+test_wasm:
+	script/generate-fixtures-wasm
+	script/test-wasm

 lint:
-	cargo update --workspace --locked --quiet
 	cargo check --workspace --all-targets
 	cargo fmt --all --check
 	cargo clippy --workspace --all-targets -- -D warnings

-lint-web:
-	npm --prefix lib/binding_web ci
-	npm --prefix lib/binding_web run lint
-
 format:
 	cargo fmt --all

-changelog:
-	@git-cliff --config .github/cliff.toml --prepend CHANGELOG.md --latest --github-token $(shell gh auth token)
-
-.PHONY: test test-wasm lint format changelog
+.PHONY: test test_wasm lint format
--- a/Package.swift
+++ b/Package.swift
@ -15,21 +15,25 @@ let package = Package(
        .target(name: "TreeSitter",
                path: "lib",
                exclude: [
-                        "src/unicode/ICU_SHA",
-                        "src/unicode/README.md",
-                        "src/unicode/LICENSE",
-                        "src/wasm/stdlib-symbols.txt",
-                        "src/lib.c",
+                    "binding_rust",
+                    "binding_web",
+                    "Cargo.toml",
+                    "README.md",
+                    "src/unicode/README.md",
+                    "src/unicode/LICENSE",
+                    "src/unicode/ICU_SHA",
+                    "src/get_changed_ranges.c",
+                    "src/tree_cursor.c",
+                    "src/stack.c",
+                    "src/node.c",
+                    "src/lexer.c",
+                    "src/parser.c",
+                    "src/language.c",
+                    "src/alloc.c",
+                    "src/subtree.c",
+                    "src/tree.c",
+                    "src/query.c"
                ],
-                sources: ["src"],
-                publicHeadersPath: "include",
-                cSettings: [
-                        .headerSearchPath("src"),
-                        .define("_POSIX_C_SOURCE", to: "200112L"),
-                        .define("_DEFAULT_SOURCE"),
-                        .define("_BSD_SOURCE"),
-                        .define("_DARWIN_C_SOURCE"),
-                ]),
-    ],
-    cLanguageStandard: .c11
+                sources: ["src/lib.c"]),
+    ]
 )
--- a/README.md
+++ b/README.md
@ -1,8 +1,6 @@
 # tree-sitter

 [![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618)
-[![discord][discord]](https://discord.gg/w7nTvsVJhm)
-[![matrix][matrix]](https://matrix.to/#/#tree-sitter-chat:matrix.org)

 Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:

@ -12,10 +10,8 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca
 - **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application

 ## Links
+
 - [Documentation](https://tree-sitter.github.io)
 - [Rust binding](lib/binding_rust/README.md)
- [Wasm binding](lib/binding_web/README.md)
- [Command-line interface](crates/cli/README.md)
-
-[discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord
-[matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix
+- [WASM binding](lib/binding_web/README.md)
+- [Command-line interface](cli/README.md)
--- a/build.zig
+++ b/build.zig
@ -1,142 +1,16 @@
 const std = @import("std");

-pub fn build(b: *std.Build) !void {
-    const target = b.standardTargetOptions(.{});
-    const optimize = b.standardOptimizeOption(.{});
-
-    const wasm = b.option(bool, "enable-wasm", "Enable Wasm support") orelse false;
-    const shared = b.option(bool, "build-shared", "Build a shared library") orelse false;
-    const amalgamated = b.option(bool, "amalgamated", "Build using an amalgamated source") orelse false;
-
-    const lib: *std.Build.Step.Compile = b.addLibrary(.{
+pub fn build(b: *std.Build) void {
+    var lib = b.addStaticLibrary(.{
        .name = "tree-sitter",
-        .linkage = if (shared) .dynamic else .static,
-        .root_module = b.createModule(.{
-            .target = target,
-            .optimize = optimize,
-            .link_libc = true,
-            .pic = if (shared) true else null,
-        }),
+        .target = b.standardTargetOptions(.{}),
+        .optimize = b.standardOptimizeOption(.{}),
    });

-    if (amalgamated) {
-        lib.addCSourceFile(.{
-            .file = b.path("lib/src/lib.c"),
-            .flags = &.{"-std=c11"},
-        });
-    } else {
-        const files = try findSourceFiles(b);
-        defer b.allocator.free(files);
-        lib.addCSourceFiles(.{
-            .root = b.path("lib/src"),
-            .files = files,
-            .flags = &.{"-std=c11"},
-        });
-    }
-
-    lib.addIncludePath(b.path("lib/include"));
-    lib.addIncludePath(b.path("lib/src"));
-    lib.addIncludePath(b.path("lib/src/wasm"));
-
-    lib.root_module.addCMacro("_POSIX_C_SOURCE", "200112L");
-    lib.root_module.addCMacro("_DEFAULT_SOURCE", "");
-    lib.root_module.addCMacro("_BSD_SOURCE", "");
-    lib.root_module.addCMacro("_DARWIN_C_SOURCE", "");
-
-    if (wasm) {
-        if (b.lazyDependency(wasmtimeDep(target.result), .{})) |wasmtime| {
-            lib.root_module.addCMacro("TREE_SITTER_FEATURE_WASM", "");
-            lib.addSystemIncludePath(wasmtime.path("include"));
-            lib.addLibraryPath(wasmtime.path("lib"));
-            if (shared) lib.linkSystemLibrary("wasmtime");
-        }
-    }
-
-    lib.installHeadersDirectory(b.path("lib/include"), ".", .{});
+    lib.linkLibC();
+    lib.addCSourceFile(.{ .file = .{ .path = "lib/src/lib.c" }, .flags = &.{} });
+    lib.addIncludePath(.{ .path = "lib/include" });
+    lib.addIncludePath(.{ .path = "lib/src" });

    b.installArtifact(lib);
 }
-
-/// Get the name of the wasmtime dependency for this target.
-pub fn wasmtimeDep(target: std.Target) []const u8 {
-    const arch = target.cpu.arch;
-    const os = target.os.tag;
-    const abi = target.abi;
-    return @as(?[]const u8, switch (os) {
-        .linux => switch (arch) {
-            .x86_64 => switch (abi) {
-                .gnu => "wasmtime_c_api_x86_64_linux",
-                .musl => "wasmtime_c_api_x86_64_musl",
-                .android => "wasmtime_c_api_x86_64_android",
-                else => null,
-            },
-            .aarch64 => switch (abi) {
-                .gnu => "wasmtime_c_api_aarch64_linux",
-                .musl => "wasmtime_c_api_aarch64_musl",
-                .android => "wasmtime_c_api_aarch64_android",
-                else => null,
-            },
-            .x86 => switch (abi) {
-                .gnu => "wasmtime_c_api_i686_linux",
-                else => null,
-            },
-            .arm => switch (abi) {
-                .gnueabi => "wasmtime_c_api_armv7_linux",
-                else => null,
-            },
-            .s390x => switch (abi) {
-                .gnu => "wasmtime_c_api_s390x_linux",
-                else => null,
-            },
-            .riscv64 => switch (abi) {
-                .gnu => "wasmtime_c_api_riscv64gc_linux",
-                else => null,
-            },
-            else => null,
-        },
-        .windows => switch (arch) {
-            .x86_64 => switch (abi) {
-                .gnu => "wasmtime_c_api_x86_64_mingw",
-                .msvc => "wasmtime_c_api_x86_64_windows",
-                else => null,
-            },
-            .aarch64 => switch (abi) {
-                .msvc => "wasmtime_c_api_aarch64_windows",
-                else => null,
-            },
-            .x86 => switch (abi) {
-                .msvc => "wasmtime_c_api_i686_windows",
-                else => null,
-            },
-            else => null,
-        },
-        .macos => switch (arch) {
-            .x86_64 => "wasmtime_c_api_x86_64_macos",
-            .aarch64 => "wasmtime_c_api_aarch64_macos",
-            else => null,
-        },
-        else => null,
-    }) orelse std.debug.panic(
-        "Unsupported target for wasmtime: {s}-{s}-{s}",
-        .{ @tagName(arch), @tagName(os), @tagName(abi) },
-    );
-}
-
-fn findSourceFiles(b: *std.Build) ![]const []const u8 {
-    var sources: std.ArrayListUnmanaged([]const u8) = .empty;
-
-    var dir = try b.build_root.handle.openDir("lib/src", .{ .iterate = true });
-    var iter = dir.iterate();
-    defer dir.close();
-
-    while (try iter.next()) |entry| {
-        if (entry.kind != .file) continue;
-        const file = entry.name;
-        const ext = std.fs.path.extension(file);
-        if (std.mem.eql(u8, ext, ".c") and !std.mem.eql(u8, file, "lib.c")) {
-            try sources.append(b.allocator, b.dupe(file));
-        }
-    }
-
-    return sources.toOwnedSlice(b.allocator);
-}
--- a/build.zig.zon
+++ b/build.zig.zon
@ -1,96 +0,0 @@
-.{
-    .name = .tree_sitter,
-    .fingerprint = 0x841224b447ac0d4f,
-    .version = "0.27.0",
-    .minimum_zig_version = "0.14.1",
-    .paths = .{
-        "build.zig",
-        "build.zig.zon",
-        "lib/src",
-        "lib/include",
-        "README.md",
-        "LICENSE",
-    },
-    .dependencies = .{
-        .wasmtime_c_api_aarch64_android = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-android-c-api.tar.xz",
-            .hash = "N-V-__8AAIfPIgdw2YnV3QyiFQ2NHdrxrXzzCdjYJyxJDOta",
-            .lazy = true,
-        },
-        .wasmtime_c_api_aarch64_linux = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-linux-c-api.tar.xz",
-            .hash = "N-V-__8AAIt97QZi7Pf7nNJ2mVY6uxA80Klyuvvtop3pLMRK",
-            .lazy = true,
-        },
-        .wasmtime_c_api_aarch64_macos = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-macos-c-api.tar.xz",
-            .hash = "N-V-__8AAAO48QQf91w9RmmUDHTja8DrXZA1n6Bmc8waW3qe",
-            .lazy = true,
-        },
-        .wasmtime_c_api_aarch64_musl = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-musl-c-api.tar.xz",
-            .hash = "N-V-__8AAI196wa9pwADoA2RbCDp5F7bKQg1iOPq6gIh8-FH",
-            .lazy = true,
-        },
-        .wasmtime_c_api_aarch64_windows = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-aarch64-windows-c-api.zip",
-            .hash = "N-V-__8AAC9u4wXfqd1Q6XyQaC8_DbQZClXux60Vu5743N05",
-            .lazy = true,
-        },
-        .wasmtime_c_api_armv7_linux = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-armv7-linux-c-api.tar.xz",
-            .hash = "N-V-__8AAHXe8gWs3s83Cc5G6SIq0_jWxj8fGTT5xG4vb6-x",
-            .lazy = true,
-        },
-        .wasmtime_c_api_i686_linux = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-i686-linux-c-api.tar.xz",
-            .hash = "N-V-__8AAN2pzgUUfulRCYnipSfis9IIYHoTHVlieLRmKuct",
-            .lazy = true,
-        },
-        .wasmtime_c_api_i686_windows = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-i686-windows-c-api.zip",
-            .hash = "N-V-__8AAJu0YAUUTFBLxFIOi-MSQVezA6MMkpoFtuaf2Quf",
-            .lazy = true,
-        },
-        .wasmtime_c_api_riscv64gc_linux = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-riscv64gc-linux-c-api.tar.xz",
-            .hash = "N-V-__8AAG8m-gc3E3AIImtTZ3l1c7HC6HUWazQ9OH5KACX4",
-            .lazy = true,
-        },
-        .wasmtime_c_api_s390x_linux = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-s390x-linux-c-api.tar.xz",
-            .hash = "N-V-__8AAH314gd-gE4IBp2uvAL3gHeuW1uUZjMiLLeUdXL_",
-            .lazy = true,
-        },
-        .wasmtime_c_api_x86_64_android = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-android-c-api.tar.xz",
-            .hash = "N-V-__8AAIPNRwfNkznebrcGb0IKUe7f35bkuZEYOjcx6q3f",
-            .lazy = true,
-        },
-        .wasmtime_c_api_x86_64_linux = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-linux-c-api.tar.xz",
-            .hash = "N-V-__8AAI8EDwcyTtk_Afhk47SEaqfpoRqGkJeZpGs69ChF",
-            .lazy = true,
-        },
-        .wasmtime_c_api_x86_64_macos = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-macos-c-api.tar.xz",
-            .hash = "N-V-__8AAGtGNgVaOpHSxC22IjrampbRIy6lLwscdcAE8nG1",
-            .lazy = true,
-        },
-        .wasmtime_c_api_x86_64_mingw = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-mingw-c-api.zip",
-            .hash = "N-V-__8AAPS2PAbVix50L6lnddlgazCPTz3whLUFk1qnRtnZ",
-            .lazy = true,
-        },
-        .wasmtime_c_api_x86_64_musl = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-musl-c-api.tar.xz",
-            .hash = "N-V-__8AAF-WEQe0nzvi09PgusM5i46FIuCKJmIDWUleWgQ3",
-            .lazy = true,
-        },
-        .wasmtime_c_api_x86_64_windows = .{
-            .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v33.0.2/wasmtime-v33.0.2-x86_64-windows-c-api.zip",
-            .hash = "N-V-__8AAKGNXwbpJQsn0_6kwSIVDDWifSg8cBzf7T2RzsC9",
-            .lazy = true,
-        },
-    },
-}
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@ -1,24 +1,15 @@
 [package]
 name = "tree-sitter-cli"
-version.workspace = true
 description = "CLI tool for developing, testing, and using Tree-sitter parsers"
-authors.workspace = true
-edition.workspace = true
-rust-version.workspace = true
+version = "0.20.9"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+edition = "2021"
+license = "MIT"
 readme = "README.md"
-homepage.workspace = true
-repository.workspace = true
-documentation = "https://docs.rs/tree-sitter-cli"
-license.workspace = true
-keywords.workspace = true
-categories.workspace = true
-include = ["build.rs", "README.md", "LICENSE", "benches/*", "src/**"]
-
-[lints]
-workspace = true
-
-[lib]
-path = "src/tree_sitter_cli.rs"
+keywords = ["incremental", "parsing"]
+categories = ["command-line-utilities", "parsing"]
+repository = "https://github.com/tree-sitter/tree-sitter"
+rust-version.workspace = true

 [[bin]]
 name = "tree-sitter"
@ -30,54 +21,67 @@ name = "benchmark"
 harness = false

 [features]
-default = ["qjs-rt"]
 wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"]
-qjs-rt = ["tree-sitter-generate/qjs-rt"]

 [dependencies]
-ansi_colours.workspace = true
+ansi_term.workspace = true
 anstyle.workspace = true
 anyhow.workspace = true
-bstr.workspace = true
 clap.workspace = true
-clap_complete.workspace = true
-clap_complete_nushell.workspace = true
-crc32fast.workspace = true
-ctor.workspace = true
 ctrlc.workspace = true
-dialoguer.workspace = true
+difference.workspace = true
+dirs.workspace = true
 glob.workspace = true
-heck.workspace = true
 html-escape.workspace = true
-indoc.workspace = true
+indexmap.workspace = true
+lazy_static.workspace = true
 log.workspace = true
 memchr.workspace = true
-rand.workspace = true
+path-slash.workspace = true
 regex.workspace = true
-schemars.workspace = true
+regex-syntax.workspace = true
+rustc-hash.workspace = true
 semver.workspace = true
 serde.workspace = true
+serde_derive.workspace = true
 serde_json.workspace = true
-similar.workspace = true
-streaming-iterator.workspace = true
-thiserror.workspace = true
+smallbitvec.workspace = true
 tiny_http.workspace = true
+toml.workspace = true
 walkdir.workspace = true
 wasmparser.workspace = true
 webbrowser.workspace = true
+which.workspace = true

-tree-sitter.workspace = true
-tree-sitter-generate.workspace = true
-tree-sitter-config.workspace = true
-tree-sitter-highlight.workspace = true
-tree-sitter-loader.workspace = true
-tree-sitter-tags.workspace = true
+[dependencies.tree-sitter]
+version = "0.20.10"
+path = "../lib"
+
+[dependencies.tree-sitter-config]
+version = "0.19.0"
+path = "config"
+
+[dependencies.tree-sitter-highlight]
+version = "0.20.2"
+path = "../highlight"
+
+[dependencies.tree-sitter-loader]
+version = "0.20.0"
+path = "loader"
+
+[dependencies.tree-sitter-tags]
+version = "0.20.2"
+path = "../tags"

 [dev-dependencies]
-encoding_rs = "0.8.35"
-widestring = "1.2.1"
 tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }

+rand.workspace = true
 tempfile.workspace = true
 pretty_assertions.workspace = true
+ctor.workspace = true
 unindent.workspace = true
+indoc.workspace = true
+
+[build-dependencies]
+toml.workspace = true
--- a/crates/cli/README.md
+++ b/crates/cli/README.md
@ -7,15 +7,14 @@
 [npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli
 [npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A

-The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`,
-`Linux`, and `Windows`.
+The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on MacOS, Linux, and Windows.

 ### Installation

 You can install the `tree-sitter-cli` with `cargo`:

 ```sh
-cargo install --locked tree-sitter-cli
+cargo install tree-sitter-cli
 ```

 or with `npm`:
@ -35,11 +34,9 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have

 ### Commands

-* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current
-  working directory. See [the documentation] for more information.
+* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information.

-* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory.
-  See [the documentation] for more information.
+* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information.

 * `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.

--- a/crates/cli/benches/benchmark.rs
+++ b/crates/cli/benches/benchmark.rs
@ -1,79 +1,68 @@
-use std::{
-    collections::BTreeMap,
-    env, fs,
-    path::{Path, PathBuf},
-    str,
-    sync::LazyLock,
-    time::Instant,
-};
-
 use anyhow::Context;
-use log::info;
+use lazy_static::lazy_static;
+use std::collections::BTreeMap;
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+use std::{env, fs, str, usize};
 use tree_sitter::{Language, Parser, Query};
-use tree_sitter_loader::{CompileConfig, Loader};
+use tree_sitter_loader::Loader;

 include!("../src/tests/helpers/dirs.rs");

-static LANGUAGE_FILTER: LazyLock<Option<String>> =
-    LazyLock::new(|| env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok());
-static EXAMPLE_FILTER: LazyLock<Option<String>> =
-    LazyLock::new(|| env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok());
-static REPETITION_COUNT: LazyLock<usize> = LazyLock::new(|| {
-    env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT")
+lazy_static! {
+    static ref LANGUAGE_FILTER: Option<String> =
+        env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok();
+    static ref EXAMPLE_FILTER: Option<String> =
+        env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok();
+    static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT")
        .map(|s| s.parse::<usize>().unwrap())
-        .unwrap_or(5)
-});
-static TEST_LOADER: LazyLock<Loader> =
-    LazyLock::new(|| Loader::with_parser_lib_path(SCRATCH_DIR.clone()));
+        .unwrap_or(5);
+    static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
+    static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)> = {
+        fn process_dir(result: &mut BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>, dir: &Path) {
+            if dir.join("grammar.js").exists() {
+                let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap();
+                let (example_paths, query_paths) =
+                    result.entry(relative_path.to_owned()).or_default();

-#[allow(clippy::type_complexity)]
-static EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: LazyLock<
-    BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>,
-> = LazyLock::new(|| {
-    fn process_dir(result: &mut BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>, dir: &Path) {
-        if dir.join("grammar.js").exists() {
-            let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap();
-            let (example_paths, query_paths) = result.entry(relative_path.to_owned()).or_default();
+                if let Ok(example_files) = fs::read_dir(dir.join("examples")) {
+                    example_paths.extend(example_files.filter_map(|p| {
+                        let p = p.unwrap().path();
+                        if p.is_file() {
+                            Some(p)
+                        } else {
+                            None
+                        }
+                    }));
+                }

-            if let Ok(example_files) = fs::read_dir(dir.join("examples")) {
-                example_paths.extend(example_files.filter_map(|p| {
-                    let p = p.unwrap().path();
-                    if p.is_file() {
-                        Some(p)
-                    } else {
-                        None
+                if let Ok(query_files) = fs::read_dir(dir.join("queries")) {
+                    query_paths.extend(query_files.filter_map(|p| {
+                        let p = p.unwrap().path();
+                        if p.is_file() {
+                            Some(p)
+                        } else {
+                            None
+                        }
+                    }));
+                }
+            } else {
+                for entry in fs::read_dir(dir).unwrap() {
+                    let entry = entry.unwrap().path();
+                    if entry.is_dir() {
+                        process_dir(result, &entry);
                    }
-                }));
-            }
-
-            if let Ok(query_files) = fs::read_dir(dir.join("queries")) {
-                query_paths.extend(query_files.filter_map(|p| {
-                    let p = p.unwrap().path();
-                    if p.is_file() {
-                        Some(p)
-                    } else {
-                        None
-                    }
-                }));
-            }
-        } else {
-            for entry in fs::read_dir(dir).unwrap() {
-                let entry = entry.unwrap().path();
-                if entry.is_dir() {
-                    process_dir(result, &entry);
                }
            }
        }
-    }

-    let mut result = BTreeMap::new();
-    process_dir(&mut result, &GRAMMARS_DIR);
-    result
-});
+        let mut result = BTreeMap::new();
+        process_dir(&mut result, &GRAMMARS_DIR);
+        result
+    };
+}

 fn main() {
-    tree_sitter_cli::logger::init();
-
    let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR
        .values()
        .flat_map(|(e, q)| {
@ -84,7 +73,7 @@ fn main() {
        .max()
        .unwrap_or(0);

-    info!("Benchmarking with {} repetitions", *REPETITION_COUNT);
+    eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT);

    let mut parser = Parser::new();
    let mut all_normal_speeds = Vec::new();
@ -101,11 +90,11 @@ fn main() {
            }
        }

-        info!("\nLanguage: {language_name}");
+        eprintln!("\nLanguage: {language_name}");
        let language = get_language(language_path);
        parser.set_language(&language).unwrap();

-        info!("  Constructing Queries");
+        eprintln!("  Constructing Queries");
        for path in query_paths {
            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
                if !path.to_str().unwrap().contains(filter.as_str()) {
@ -115,12 +104,12 @@ fn main() {

            parse(path, max_path_length, |source| {
                Query::new(&language, str::from_utf8(source).unwrap())
-                    .with_context(|| format!("Query file path: {}", path.display()))
+                    .with_context(|| format!("Query file path: {path:?}"))
                    .expect("Failed to parse query");
            });
        }

-        info!("  Parsing Valid Code:");
+        eprintln!("  Parsing Valid Code:");
        let mut normal_speeds = Vec::new();
        for example_path in example_paths {
            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
@ -134,7 +123,7 @@ fn main() {
            }));
        }

-        info!("  Parsing Invalid Code (mismatched languages):");
+        eprintln!("  Parsing Invalid Code (mismatched languages):");
        let mut error_speeds = Vec::new();
        for (other_language_path, (example_paths, _)) in
            EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
@ -155,30 +144,30 @@ fn main() {
        }

        if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) {
-            info!("  Average Speed (normal): {average_normal} bytes/ms");
-            info!("  Worst Speed (normal):   {worst_normal} bytes/ms");
+            eprintln!("  Average Speed (normal): {average_normal} bytes/ms");
+            eprintln!("  Worst Speed (normal):   {worst_normal} bytes/ms");
        }

        if let Some((average_error, worst_error)) = aggregate(&error_speeds) {
-            info!("  Average Speed (errors): {average_error} bytes/ms");
-            info!("  Worst Speed (errors):   {worst_error} bytes/ms");
+            eprintln!("  Average Speed (errors): {average_error} bytes/ms");
+            eprintln!("  Worst Speed (errors):   {worst_error} bytes/ms");
        }

        all_normal_speeds.extend(normal_speeds);
        all_error_speeds.extend(error_speeds);
    }

-    info!("\n  Overall");
+    eprintln!("\n  Overall");
    if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
-        info!("  Average Speed (normal): {average_normal} bytes/ms");
-        info!("  Worst Speed (normal):   {worst_normal} bytes/ms");
+        eprintln!("  Average Speed (normal): {average_normal} bytes/ms");
+        eprintln!("  Worst Speed (normal):   {worst_normal} bytes/ms");
    }

    if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) {
-        info!("  Average Speed (errors): {average_error} bytes/ms");
-        info!("  Worst Speed (errors):   {worst_error} bytes/ms");
+        eprintln!("  Average Speed (errors): {average_error} bytes/ms");
+        eprintln!("  Worst Speed (errors):   {worst_error} bytes/ms");
    }
-    info!("");
+    eprintln!();
 }

 fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> {
@ -197,8 +186,14 @@ fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> {
 }

 fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize {
+    eprint!(
+        "    {:width$}\t",
+        path.file_name().unwrap().to_str().unwrap(),
+        width = max_path_length
+    );
+
    let source_code = fs::read(path)
-        .with_context(|| format!("Failed to read {}", path.display()))
+        .with_context(|| format!("Failed to read {path:?}"))
        .unwrap();
    let time = Instant::now();
    for _ in 0..*REPETITION_COUNT {
@ -207,18 +202,17 @@ fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) ->
    let duration = time.elapsed() / (*REPETITION_COUNT as u32);
    let duration_ns = duration.as_nanos();
    let speed = ((source_code.len() as u128) * 1_000_000) / duration_ns;
-    info!(
-        "    {:max_path_length$}\ttime {:>7.2} ms\t\tspeed {speed:>6} bytes/ms",
-        path.file_name().unwrap().to_str().unwrap(),
+    eprintln!(
+        "time {:>7.2} ms\t\tspeed {speed:>6} bytes/ms",
        (duration_ns as f64) / 1e6,
    );
    speed as usize
 }

 fn get_language(path: &Path) -> Language {
-    let src_path = GRAMMARS_DIR.join(path).join("src");
+    let src_dir = GRAMMARS_DIR.join(path).join("src");
    TEST_LOADER
-        .load_language_at_path(CompileConfig::new(&src_path, None, None))
-        .with_context(|| format!("Failed to load language at path {}", src_path.display()))
+        .load_language_at_path(&src_dir, &[&src_dir])
+        .with_context(|| format!("Failed to load language at path {src_dir:?}"))
        .unwrap()
 }
--- a/cli/build.rs
+++ b/cli/build.rs
@ -0,0 +1,116 @@
+use std::ffi::OsStr;
+use std::path::{Path, PathBuf};
+use std::{env, fs};
+
+fn main() {
+    if let Some(git_sha) = read_git_sha() {
+        println!("cargo:rustc-env=BUILD_SHA={git_sha}");
+    }
+
+    if web_playground_files_present() {
+        println!("cargo:rustc-cfg=TREE_SITTER_EMBED_WASM_BINDING");
+    }
+
+    let rust_binding_version = read_rust_binding_version();
+    println!("cargo:rustc-env=RUST_BINDING_VERSION={rust_binding_version}");
+}
+
+fn web_playground_files_present() -> bool {
+    let paths = [
+        "../docs/assets/js/playground.js",
+        "../lib/binding_web/tree-sitter.js",
+        "../lib/binding_web/tree-sitter.wasm",
+    ];
+
+    paths.iter().all(|p| Path::new(p).exists())
+}
+
+fn read_git_sha() -> Option<String> {
+    let mut repo_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
+
+    let mut git_path;
+    loop {
+        git_path = repo_path.join(".git");
+        if git_path.exists() {
+            break;
+        } else if !repo_path.pop() {
+            return None;
+        }
+    }
+
+    let git_dir_path;
+    if git_path.is_dir() {
+        git_dir_path = git_path;
+    } else if let Ok(git_path_content) = fs::read_to_string(&git_path) {
+        git_dir_path = repo_path.join(git_path_content.get("gitdir: ".len()..).unwrap().trim_end());
+    } else {
+        return None;
+    }
+    let git_head_path = git_dir_path.join("HEAD");
+    if let Some(path) = git_head_path.to_str() {
+        println!("cargo:rerun-if-changed={path}");
+    }
+    if let Ok(mut head_content) = fs::read_to_string(&git_head_path) {
+        if head_content.ends_with('\n') {
+            head_content.pop();
+        }
+
+        // If we're on a branch, read the SHA from the ref file.
+        if head_content.starts_with("ref: ") {
+            head_content.replace_range(0.."ref: ".len(), "");
+            let ref_filename = {
+                // Go to real non-worktree gitdir
+                let git_dir_path = git_dir_path
+                    .parent()
+                    .and_then(|p| {
+                        p.file_name()
+                            .map(|n| n == OsStr::new("worktrees"))
+                            .and_then(|x| x.then(|| p.parent()))
+                    })
+                    .flatten()
+                    .unwrap_or(&git_dir_path);
+
+                let file = git_dir_path.join(&head_content);
+                if file.is_file() {
+                    file
+                } else {
+                    let packed_refs = git_dir_path.join("packed-refs");
+                    if let Ok(packed_refs_content) = fs::read_to_string(&packed_refs) {
+                        for line in packed_refs_content.lines() {
+                            if let Some((hash, r#ref)) = line.split_once(' ') {
+                                if r#ref == head_content {
+                                    if let Some(path) = packed_refs.to_str() {
+                                        println!("cargo:rerun-if-changed={path}");
+                                    }
+                                    return Some(hash.to_string());
+                                }
+                            }
+                        }
+                    }
+                    return None;
+                }
+            };
+            if let Some(path) = ref_filename.to_str() {
+                println!("cargo:rerun-if-changed={path}");
+            }
+            return fs::read_to_string(&ref_filename).ok();
+        }
+        // If we're on a detached commit, then the `HEAD` file itself contains the sha.
+        else if head_content.len() == 40 {
+            return Some(head_content);
+        }
+    }
+
+    None
+}
+
+fn read_rust_binding_version() -> String {
+    let path = "Cargo.toml";
+    let text = fs::read_to_string(path).unwrap();
+    let cargo_toml = toml::from_str::<toml::Value>(text.as_ref()).unwrap();
+    cargo_toml["dependencies"]["tree-sitter"]["version"]
+        .as_str()
+        .unwrap()
+        .trim_matches('"')
+        .to_string()
+}
--- a/cli/config/Cargo.toml
+++ b/cli/config/Cargo.toml
@ -0,0 +1,18 @@
+[package]
+name = "tree-sitter-config"
+description = "User configuration of tree-sitter's command line programs"
+version = "0.19.0"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+edition = "2021"
+license = "MIT"
+readme = "README.md"
+keywords = ["incremental", "parsing"]
+categories = ["command-line-utilities", "parsing"]
+repository = "https://github.com/tree-sitter/tree-sitter"
+rust-version.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+dirs.workspace = true
+serde.workspace = true
+serde_json.workspace = true
--- a/crates/config/README.md
+++ b/crates/config/README.md
--- a/crates/config/src/tree_sitter_config.rs
+++ b/crates/config/src/tree_sitter_config.rs
@ -1,54 +1,10 @@
-#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
+#![doc = include_str!("../README.md")]

-use std::{
-    env, fs,
-    path::{Path, PathBuf},
-};
-
-use etcetera::BaseStrategy as _;
-use log::warn;
+use anyhow::{anyhow, Context, Result};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
-use thiserror::Error;
-
-pub type ConfigResult<T> = Result<T, ConfigError>;
-
-#[derive(Debug, Error)]
-pub enum ConfigError {
-    #[error("Bad JSON config {0} -- {1}")]
-    ConfigRead(String, serde_json::Error),
-    #[error(transparent)]
-    HomeDir(#[from] etcetera::HomeDirError),
-    #[error(transparent)]
-    IO(IoError),
-    #[error(transparent)]
-    Serialization(#[from] serde_json::Error),
-}
-
-#[derive(Debug, Error)]
-pub struct IoError {
-    pub error: std::io::Error,
-    pub path: Option<String>,
-}
-
-impl IoError {
-    fn new(error: std::io::Error, path: Option<&Path>) -> Self {
-        Self {
-            error,
-            path: path.map(|p| p.to_string_lossy().to_string()),
-        }
-    }
-}
-
-impl std::fmt::Display for IoError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.error)?;
-        if let Some(ref path) = self.path {
-            write!(f, " ({path})")?;
-        }
-        Ok(())
-    }
-}
+use std::path::PathBuf;
+use std::{env, fs};

 /// Holds the contents of tree-sitter's configuration file.
 ///
@ -65,7 +21,7 @@ pub struct Config {
 }

 impl Config {
-    pub fn find_config_file() -> ConfigResult<Option<PathBuf>> {
+    pub fn find_config_file() -> Result<Option<PathBuf>> {
        if let Ok(path) = env::var("TREE_SITTER_DIR") {
            let mut path = PathBuf::from(path);
            path.push("config.json");
@ -82,28 +38,8 @@ impl Config {
            return Ok(Some(xdg_path));
        }

-        if cfg!(target_os = "macos") {
-            let legacy_apple_path = etcetera::base_strategy::Apple::new()?
-                .data_dir() // `$HOME/Library/Application Support/`
-                .join("tree-sitter")
-                .join("config.json");
-            if legacy_apple_path.is_file() {
-                let xdg_dir = xdg_path.parent().unwrap();
-                fs::create_dir_all(xdg_dir)
-                    .map_err(|e| ConfigError::IO(IoError::new(e, Some(xdg_dir))))?;
-                fs::rename(&legacy_apple_path, &xdg_path).map_err(|e| {
-                    ConfigError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
-                })?;
-                warn!(
-                    "Your config.json file has been automatically migrated from \"{}\" to \"{}\"",
-                    legacy_apple_path.display(),
-                    xdg_path.display()
-                );
-                return Ok(Some(xdg_path));
-            }
-        }
-
-        let legacy_path = etcetera::home_dir()?
+        let legacy_path = dirs::home_dir()
+            .ok_or_else(|| anyhow!("Cannot determine home directory"))?
            .join(".tree-sitter")
            .join("config.json");
        if legacy_path.is_file() {
@ -113,9 +49,9 @@ impl Config {
        Ok(None)
    }

-    fn xdg_config_file() -> ConfigResult<PathBuf> {
-        let xdg_path = etcetera::choose_base_strategy()?
-            .config_dir()
+    fn xdg_config_file() -> Result<PathBuf> {
+        let xdg_path = dirs::config_dir()
+            .ok_or_else(|| anyhow!("Cannot determine config directory"))?
            .join("tree-sitter")
            .join("config.json");
        Ok(xdg_path)
@ -124,25 +60,19 @@ impl Config {
    /// Locates and loads in the user's configuration file.  We search for the configuration file
    /// in the following locations, in order:
    ///
-    ///   - Location specified by the path parameter if provided
    ///   - `$TREE_SITTER_DIR/config.json`, if the `TREE_SITTER_DIR` environment variable is set
-    ///   - `tree-sitter/config.json` in your default user configuration directory, as determined by
-    ///     [`etcetera::choose_base_strategy`](https://docs.rs/etcetera/*/etcetera/#basestrategy)
+    ///   - `tree-sitter/config.json` in your default user configuration directory, as determined
+    ///     by [`dirs::config_dir`](https://docs.rs/dirs/*/dirs/fn.config_dir.html)
    ///   - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store
    ///     its configuration
-    pub fn load(path: Option<PathBuf>) -> ConfigResult<Self> {
-        let location = if let Some(path) = path {
-            path
-        } else if let Some(path) = Self::find_config_file()? {
-            path
-        } else {
+    pub fn load() -> Result<Self> {
+        let Some(location) = Self::find_config_file()? else {
            return Self::initial();
        };
-
        let content = fs::read_to_string(&location)
-            .map_err(|e| ConfigError::IO(IoError::new(e, Some(location.as_path()))))?;
+            .with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?;
        let config = serde_json::from_str(&content)
-            .map_err(|e| ConfigError::ConfigRead(location.to_string_lossy().to_string(), e))?;
+            .with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?;
        Ok(Self { location, config })
    }

@ -152,7 +82,7 @@ impl Config {
    /// disk.
    ///
    /// (Note that this is typically only done by the `tree-sitter init-config` command.)
-    pub fn initial() -> ConfigResult<Self> {
+    pub fn initial() -> Result<Self> {
        let location = if let Ok(path) = env::var("TREE_SITTER_DIR") {
            let mut path = PathBuf::from(path);
            path.push("config.json");
@ -165,20 +95,17 @@ impl Config {
    }

    /// Saves this configuration to the file that it was originally loaded from.
-    pub fn save(&self) -> ConfigResult<()> {
+    pub fn save(&self) -> Result<()> {
        let json = serde_json::to_string_pretty(&self.config)?;
-        let config_dir = self.location.parent().unwrap();
-        fs::create_dir_all(config_dir)
-            .map_err(|e| ConfigError::IO(IoError::new(e, Some(config_dir))))?;
-        fs::write(&self.location, json)
-            .map_err(|e| ConfigError::IO(IoError::new(e, Some(self.location.as_path()))))?;
+        fs::create_dir_all(self.location.parent().unwrap())?;
+        fs::write(&self.location, json)?;
        Ok(())
    }

    /// Parses a component-specific configuration from the configuration file.  The type `C` must
    /// be [deserializable](https://docs.rs/serde/*/serde/trait.Deserialize.html) from a JSON
    /// object, and must only include the fields relevant to that component.
-    pub fn get<C>(&self) -> ConfigResult<C>
+    pub fn get<C>(&self) -> Result<C>
    where
        C: for<'de> Deserialize<'de>,
    {
@ -189,7 +116,7 @@ impl Config {
    /// Adds a component-specific configuration to the configuration file.  The type `C` must be
    /// [serializable](https://docs.rs/serde/*/serde/trait.Serialize.html) into a JSON object, and
    /// must only include the fields relevant to that component.
-    pub fn add<C>(&mut self, config: C) -> ConfigResult<()>
+    pub fn add<C>(&mut self, config: C) -> Result<()>
    where
        C: Serialize,
    {
--- a/cli/loader/Cargo.toml
+++ b/cli/loader/Cargo.toml
@ -0,0 +1,38 @@
+[package]
+name = "tree-sitter-loader"
+description = "Locates, builds, and loads tree-sitter grammars at runtime"
+version = "0.20.0"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+edition = "2021"
+license = "MIT"
+readme = "README.md"
+keywords = ["incremental", "parsing"]
+categories = ["command-line-utilities", "parsing"]
+repository = "https://github.com/tree-sitter/tree-sitter"
+rust-version.workspace = true
+
+[features]
+wasm = ["tree-sitter/wasm"]
+
+[dependencies]
+anyhow.workspace = true
+cc.workspace = true
+dirs.workspace = true
+libloading.workspace = true
+once_cell.workspace = true
+regex.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+which.workspace = true
+
+[dependencies.tree-sitter]
+version = "0.20.10"
+path = "../../lib"
+
+[dependencies.tree-sitter-highlight]
+version = "0.20.2"
+path = "../../highlight"
+
+[dependencies.tree-sitter-tags]
+version = "0.20.2"
+path = "../../tags"
--- a/crates/loader/README.md
+++ b/crates/loader/README.md
--- a/cli/loader/build.rs
+++ b/cli/loader/build.rs
@ -0,0 +1,9 @@
+fn main() {
+    println!(
+        "cargo:rustc-env=BUILD_TARGET={}",
+        std::env::var("TARGET").unwrap()
+    );
+
+    let emscripten_version = std::fs::read_to_string("emscripten-version").unwrap();
+    println!("cargo:rustc-env=EMSCRIPTEN_VERSION={emscripten_version}");
+}
--- a/cli/loader/emscripten-version
+++ b/cli/loader/emscripten-version
@ -0,0 +1 @@
+3.1.37
--- a/cli/loader/src/lib.rs
+++ b/cli/loader/src/lib.rs
--- a/crates/cli/npm/.gitignore
+++ b/crates/cli/npm/.gitignore
--- a/crates/cli/npm/cli.js
+++ b/crates/cli/npm/cli.js
--- a/crates/cli/npm/dsl.d.ts
+++ b/crates/cli/npm/dsl.d.ts
@ -10,7 +10,6 @@ type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number };
 type PrecRule = { type: 'PREC'; content: Rule; value: number };
 type Repeat1Rule = { type: 'REPEAT1'; content: Rule };
 type RepeatRule = { type: 'REPEAT'; content: Rule };
-type ReservedRule = { type: 'RESERVED'; content: Rule; context_name: string };
 type SeqRule = { type: 'SEQ'; members: Rule[] };
 type StringRule = { type: 'STRING'; value: string };
 type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name };
@ -29,19 +28,12 @@ type Rule =
  | PrecRule
  | Repeat1Rule
  | RepeatRule
-  | ReservedRule
  | SeqRule
  | StringRule
  | SymbolRule<string>
  | TokenRule;

-declare class RustRegex {
-  value: string;
-
-  constructor(pattern: string);
-}
-
-type RuleOrLiteral = Rule | RegExp | RustRegex | string;
+type RuleOrLiteral = Rule | RegExp | string;

 type GrammarSymbols<RuleName extends string> = {
  [name in RuleName]: SymbolRule<name>;
@ -50,7 +42,7 @@ type GrammarSymbols<RuleName extends string> = {

 type RuleBuilder<RuleName extends string> = (
  $: GrammarSymbols<RuleName>,
-  previous?: Rule,
+  previous: Rule,
 ) => RuleOrLiteral;

 type RuleBuilders<
@ -113,7 +105,7 @@ interface Grammar<
   * @param $ grammar rules
   * @param previous array of externals from the base schema, if any
   *
-   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
   */
  externals?: (
    $: Record<string, SymbolRule<string>>,
@ -151,7 +143,7 @@ interface Grammar<
   *
   * @param $ grammar rules
   *
-   * @see https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types
+   * @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
   */
  supertypes?: (
    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
@ -164,20 +156,9 @@ interface Grammar<
   *
   * @param $ grammar rules
   *
-   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/3-writing-the-grammar#keyword-extraction
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
   */
  word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
-
-
-  /**
-   * Mapping of names to reserved word sets. The first reserved word set is the
-   * global word set, meaning it applies to every rule in every parse state.
-   * The other word sets can be used with the `reserved` function.
-   */
-  reserved?: Record<
-    string,
-    ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral[]
-  >;
 }

 type GrammarSchema<RuleName extends string> = {
@ -262,7 +243,7 @@ declare function optional(rule: RuleOrLiteral): ChoiceRule;
 * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
 */
 declare const prec: {
-  (value: string | number, rule: RuleOrLiteral): PrecRule;
+  (value: String | number, rule: RuleOrLiteral): PrecRule;

  /**
   * Marks the given rule as left-associative (and optionally applies a
@ -278,7 +259,7 @@ declare const prec: {
   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
   */
  left(rule: RuleOrLiteral): PrecLeftRule;
-  left(value: string | number, rule: RuleOrLiteral): PrecLeftRule;
+  left(value: String | number, rule: RuleOrLiteral): PrecLeftRule;

  /**
   * Marks the given rule as right-associative (and optionally applies a
@ -294,7 +275,7 @@ declare const prec: {
   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
   */
  right(rule: RuleOrLiteral): PrecRightRule;
-  right(value: string | number, rule: RuleOrLiteral): PrecRightRule;
+  right(value: String | number, rule: RuleOrLiteral): PrecRightRule;

  /**
   * Marks the given rule with a numerical precedence which will be used to
@ -311,7 +292,7 @@ declare const prec: {
   *
   * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
   */
-  dynamic(value: string | number, rule: RuleOrLiteral): PrecDynamicRule;
+  dynamic(value: String | number, rule: RuleOrLiteral): PrecDynamicRule;
 };

 /**
@ -331,15 +312,6 @@ declare function repeat(rule: RuleOrLiteral): RepeatRule;
 */
 declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;

-/**
- * Overrides the global reserved word set for a given rule. The word set name
- * should be defined in the `reserved` field in the grammar.
- *
- * @param wordset name of the reserved word set
- * @param rule rule that will use the reserved word set
- */
-declare function reserved(wordset: string, rule: RuleOrLiteral): ReservedRule;
-
 /**
 * Creates a rule that matches any number of other rules, one after another.
 * It is analogous to simply writing multiple symbols next to each other
@ -358,7 +330,7 @@ declare function sym<Name extends string>(name: Name): SymbolRule<Name>;

 /**
 * Marks the given rule as producing only a single token. Tree-sitter's
- * default is to treat each string or RegExp literal in the grammar as a
+ * default is to treat each String or RegExp literal in the grammar as a
 * separate token. Each token is matched separately by the lexer and
 * returned as its own leaf node in the tree. The token function allows
 * you to express a complex rule using the DSL functions (rather
--- a/crates/cli/npm/install.js
+++ b/crates/cli/npm/install.js
@ -6,8 +6,7 @@ const http = require('http');
 const https = require('https');
 const packageJSON = require('./package.json');

-https.globalAgent.keepAlive = false;
-
+// Look to a results table in https://github.com/tree-sitter/tree-sitter/issues/2196
 const matrix = {
  platform: {
    'darwin': {
@ -41,7 +40,7 @@ const matrix = {

 // Determine the URL of the file.
 const platform = matrix.platform[process.platform];
-const arch = platform?.arch[process.arch];
+const arch = platform && platform.arch[process.arch];

 if (!platform || !platform.name || !arch || !arch.name) {
  console.error(
@ -92,7 +91,7 @@ function get(url, callback) {
    }
  };

-  const proxyEnv = process.env.HTTPS_PROXY || process.env.https_proxy;
+  const proxyEnv = process.env['HTTPS_PROXY'] || process.env['https_proxy'];
  if (!proxyEnv) {
    https.get(url, processResponse);
    return;
@ -102,23 +101,12 @@ function get(url, callback) {
  const requestPort = requestUrl.port || (requestUrl.protocol === 'https:' ? 443 : 80);
  const proxyUrl = new URL(proxyEnv);
  const request = proxyUrl.protocol === 'https:' ? https : http;
-  const requestOption = {
+  request.request({
    host: proxyUrl.hostname,
    port: proxyUrl.port || (proxyUrl.protocol === 'https:' ? 443 : 80),
    method: 'CONNECT',
    path: `${requestUrl.hostname}:${requestPort}`,
-  };
-  if (proxyUrl.username || proxyUrl.password) {
-    const auth = `${decodeURIComponent(
-      proxyUrl.username
-    )}:${decodeURIComponent(proxyUrl.password)}`;
-    requestOption.headers = {
-      'Proxy-Authorization': `Basic ${Buffer.from(
-        auth
-      ).toString('base64')}`,
-    }
-  }
-  request.request(requestOption).on('connect', (response, socket, _head) => {
+  }).on('connect', (response, socket, _head) => {
    if (response.statusCode !== 200) {
      // let caller handle error
      callback(response);
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@ -0,0 +1,24 @@
+{
+  "name": "tree-sitter-cli",
+  "version": "0.20.9",
+  "author": "Max Brunsfeld",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/tree-sitter/tree-sitter.git"
+  },
+  "description": "CLI for generating fast incremental parsers",
+  "keywords": [
+    "parser",
+    "lexer"
+  ],
+  "main": "lib/api/index.js",
+  "scripts": {
+    "install": "node install.js",
+    "prepack": "cp ../../LICENSE ../README.md .",
+    "postpack": "rm LICENSE README.md"
+  },
+  "bin": {
+    "tree-sitter": "cli.js"
+  }
+}
--- a/cli/src/generate/binding_files.rs
+++ b/cli/src/generate/binding_files.rs
@ -0,0 +1,153 @@
+use super::write_file;
+use anyhow::{Context, Result};
+use std::path::{Path, PathBuf};
+use std::{fs, str};
+
+const BINDING_CC_TEMPLATE: &str = include_str!("./templates/binding.cc");
+const BINDING_GYP_TEMPLATE: &str = include_str!("./templates/binding.gyp");
+const INDEX_JS_TEMPLATE: &str = include_str!("./templates/index.js");
+const LIB_RS_TEMPLATE: &str = include_str!("./templates/lib.rs");
+const BUILD_RS_TEMPLATE: &str = include_str!("./templates/build.rs");
+const CARGO_TOML_TEMPLATE: &str = include_str!("./templates/cargo.toml");
+const PACKAGE_JSON_TEMPLATE: &str = include_str!("./templates/package.json");
+const PARSER_NAME_PLACEHOLDER: &str = "PARSER_NAME";
+const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION";
+const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
+const RUST_BINDING_VERSION: &str = env!("RUST_BINDING_VERSION");
+const RUST_BINDING_VERSION_PLACEHOLDER: &str = "RUST_BINDING_VERSION";
+
+pub fn generate_binding_files(repo_path: &Path, language_name: &str) -> Result<()> {
+    let bindings_dir = repo_path.join("bindings");
+
+    let dashed_language_name = language_name.replace('_', "-");
+    let dashed_language_name = dashed_language_name.as_str();
+
+    // Generate rust bindings if needed.
+    let rust_binding_dir = bindings_dir.join("rust");
+    create_path(&rust_binding_dir, |path| create_dir(path))?;
+
+    create_path(&rust_binding_dir.join("lib.rs"), |path| {
+        generate_file(path, LIB_RS_TEMPLATE, language_name)
+    })?;
+
+    create_path(&rust_binding_dir.join("build.rs"), |path| {
+        generate_file(path, BUILD_RS_TEMPLATE, language_name)
+    })?;
+
+    create_path(&repo_path.join("Cargo.toml"), |path| {
+        generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name)
+    })?;
+
+    // Generate node bindings
+    let node_binding_dir = bindings_dir.join("node");
+    create_path(&node_binding_dir, |path| create_dir(path))?;
+
+    create_path(&node_binding_dir.join("index.js"), |path| {
+        generate_file(path, INDEX_JS_TEMPLATE, language_name)
+    })?;
+
+    create_path(&node_binding_dir.join("binding.cc"), |path| {
+        generate_file(path, BINDING_CC_TEMPLATE, language_name)
+    })?;
+
+    // Create binding.gyp, or update it with new binding path.
+    let binding_gyp_path = repo_path.join("binding.gyp");
+    create_path_else(
+        &binding_gyp_path,
+        |path| generate_file(path, BINDING_GYP_TEMPLATE, language_name),
+        |path| {
+            let binding_gyp =
+                fs::read_to_string(path).with_context(|| "Failed to read binding.gyp")?;
+            let old_path = "\"src/binding.cc\"";
+            if binding_gyp.contains(old_path) {
+                eprintln!("Updating binding.gyp with new binding path");
+                let binding_gyp = binding_gyp.replace(old_path, "\"bindings/node/binding.cc\"");
+                write_file(path, binding_gyp)?;
+            }
+            Ok(())
+        },
+    )?;
+
+    // Create package.json, or update it with new binding path.
+    let package_json_path = repo_path.join("package.json");
+    create_path_else(
+        &package_json_path,
+        |path| generate_file(path, PACKAGE_JSON_TEMPLATE, dashed_language_name),
+        |path| {
+            let package_json_str =
+                fs::read_to_string(path).with_context(|| "Failed to read package.json")?;
+            let mut package_json =
+                serde_json::from_str::<serde_json::Map<String, serde_json::Value>>(
+                    &package_json_str,
+                )
+                .with_context(|| "Failed to parse package.json")?;
+            let package_json_main = package_json.get("main");
+            let package_json_needs_update = package_json_main.map_or(true, |v| {
+                let main_string = v.as_str();
+                main_string == Some("index.js") || main_string == Some("./index.js")
+            });
+            if package_json_needs_update {
+                eprintln!("Updating package.json with new binding path");
+                package_json.insert(
+                    "main".to_string(),
+                    serde_json::Value::String("bindings/node".to_string()),
+                );
+                let mut package_json_str = serde_json::to_string_pretty(&package_json)?;
+                package_json_str.push('\n');
+                write_file(path, package_json_str)?;
+            }
+            Ok(())
+        },
+    )?;
+
+    // Remove files from old node binding paths.
+    let old_index_js_path = repo_path.join("index.js");
+    let old_binding_cc_path = repo_path.join("src").join("binding.cc");
+    if old_index_js_path.exists() {
+        fs::remove_file(old_index_js_path).ok();
+    }
+    if old_binding_cc_path.exists() {
+        fs::remove_file(old_binding_cc_path).ok();
+    }
+
+    Ok(())
+}
+
+fn generate_file(path: &Path, template: &str, language_name: &str) -> Result<()> {
+    write_file(
+        path,
+        template
+            .replace(PARSER_NAME_PLACEHOLDER, language_name)
+            .replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION)
+            .replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION),
+    )
+}
+
+fn create_dir(path: &Path) -> Result<()> {
+    fs::create_dir_all(path)
+        .with_context(|| format!("Failed to create {:?}", path.to_string_lossy()))
+}
+
+fn create_path<F>(path: &PathBuf, action: F) -> Result<bool>
+where
+    F: Fn(&PathBuf) -> Result<()>,
+{
+    if !path.exists() {
+        action(path)?;
+        return Ok(true);
+    }
+    Ok(false)
+}
+
+fn create_path_else<T, F>(path: &PathBuf, action: T, else_action: F) -> Result<bool>
+where
+    T: Fn(&PathBuf) -> Result<()>,
+    F: Fn(&PathBuf) -> Result<()>,
+{
+    if !path.exists() {
+        action(path)?;
+        return Ok(true);
+    }
+    else_action(path)?;
+    Ok(false)
+}
--- a/crates/generate/src/build_tables/build_lex_table.rs
+++ b/crates/generate/src/build_tables/build_lex_table.rs
@ -1,26 +1,14 @@
-use std::{
-    collections::{hash_map::Entry, HashMap, VecDeque},
-    mem,
-};
-
-use log::debug;
-
-use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap};
-use crate::{
-    dedup::split_state_id_groups,
-    grammars::{LexicalGrammar, SyntaxGrammar},
-    nfa::{CharacterSet, NfaCursor},
-    rules::{Symbol, TokenSet},
-    tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable},
-};
-
-pub const LARGE_CHARACTER_RANGE_COUNT: usize = 8;
-
-pub struct LexTables {
-    pub main_lex_table: LexTable,
-    pub keyword_lex_table: LexTable,
-    pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
-}
+use super::coincident_tokens::CoincidentTokenIndex;
+use super::token_conflicts::TokenConflictMap;
+use crate::generate::dedup::split_state_id_groups;
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::nfa::NfaCursor;
+use crate::generate::rules::{Symbol, TokenSet};
+use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
+use log::info;
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, VecDeque};
+use std::mem;

 pub fn build_lex_table(
    parse_table: &mut ParseTable,
@ -29,7 +17,7 @@ pub fn build_lex_table(
    keywords: &TokenSet,
    coincident_token_index: &CoincidentTokenIndex,
    token_conflict_map: &TokenConflictMap,
-) -> LexTables {
+) -> (LexTable, LexTable) {
    let keyword_lex_table = if syntax_grammar.word_token.is_some() {
        let mut builder = LexTableBuilder::new(lexical_grammar);
        builder.add_state_for_tokens(keywords);
@ -38,22 +26,20 @@ pub fn build_lex_table(
        LexTable::default()
    };

-    let mut parse_state_ids_by_token_set = Vec::<(TokenSet, Vec<ParseStateId>)>::new();
+    let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec<ParseStateId>)> = Vec::new();
    for (i, state) in parse_table.states.iter().enumerate() {
        let tokens = state
            .terminal_entries
            .keys()
-            .copied()
-            .chain(state.reserved_words.iter())
            .filter_map(|token| {
                if token.is_terminal() {
-                    if keywords.contains(&token) {
+                    if keywords.contains(token) {
                        syntax_grammar.word_token
                    } else {
-                        Some(token)
+                        Some(*token)
                    }
                } else if token.is_eof() {
-                    Some(token)
+                    Some(*token)
                } else {
                    None
                }
@ -88,45 +74,10 @@ pub fn build_lex_table(
        }
    }

-    let mut main_lex_table = mem::take(&mut builder.table);
-    minimize_lex_table(&mut main_lex_table, parse_table);
-    sort_states(&mut main_lex_table, parse_table);
-
-    let mut large_character_sets = Vec::new();
-    for (variable_ix, _variable) in lexical_grammar.variables.iter().enumerate() {
-        let symbol = Symbol::terminal(variable_ix);
-        builder.reset();
-        builder.add_state_for_tokens(&TokenSet::from_iter([symbol]));
-        for state in &builder.table.states {
-            let mut characters = CharacterSet::empty();
-            for (chars, action) in &state.advance_actions {
-                if action.in_main_token {
-                    characters = characters.add(chars);
-                    continue;
-                }
-
-                if chars.range_count() > LARGE_CHARACTER_RANGE_COUNT
-                    && !large_character_sets.iter().any(|(_, set)| set == chars)
-                {
-                    large_character_sets.push((None, chars.clone()));
-                }
-            }
-
-            if characters.range_count() > LARGE_CHARACTER_RANGE_COUNT
-                && !large_character_sets
-                    .iter()
-                    .any(|(_, set)| *set == characters)
-            {
-                large_character_sets.push((Some(symbol), characters));
-            }
-        }
-    }
-
-    LexTables {
-        main_lex_table,
-        keyword_lex_table,
-        large_character_sets,
-    }
+    let mut table = builder.table;
+    minimize_lex_table(&mut table, parse_table);
+    sort_states(&mut table, parse_table);
+    (table, keyword_lex_table)
 }

 struct QueueEntry {
@ -154,12 +105,6 @@ impl<'a> LexTableBuilder<'a> {
        }
    }

-    fn reset(&mut self) {
-        self.table = LexTable::default();
-        self.state_queue.clear();
-        self.state_ids_by_nfa_state_set.clear();
-    }
-
    fn add_state_for_tokens(&mut self, tokens: &TokenSet) -> usize {
        let mut eof_valid = false;
        let nfa_states = tokens
@ -176,8 +121,9 @@ impl<'a> LexTableBuilder<'a> {
        let (state_id, is_new) = self.add_state(nfa_states, eof_valid);

        if is_new {
-            debug!(
-                "entry point state: {state_id}, tokens: {:?}",
+            info!(
+                "entry point state: {}, tokens: {:?}",
+                state_id,
                tokens
                    .iter()
                    .map(|t| &self.lexical_grammar.variables[t.index].name)
@ -358,7 +304,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
        &mut group_ids_by_state_id,
        1,
        lex_states_differ,
-    ) {}
+    ) {
+        continue;
+    }

    let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
    for state_ids in &state_ids_by_group_id {
--- a/crates/generate/src/build_tables/build_parse_table.rs
+++ b/crates/generate/src/build_tables/build_parse_table.rs
@ -1,28 +1,24 @@
-use std::{
-    cmp::Ordering,
-    collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque},
-    hash::BuildHasherDefault,
+use super::item::{ParseItem, ParseItemSet, ParseItemSetCore};
+use super::item_set_builder::ParseItemSetBuilder;
+use crate::generate::grammars::PrecedenceEntry;
+use crate::generate::grammars::{
+    InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
 };
+use crate::generate::node_types::VariableInfo;
+use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet};
+use crate::generate::tables::{
+    FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+    ProductionInfo, ProductionInfoId,
+};
+use anyhow::{anyhow, Result};
+use std::cmp::Ordering;
+use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
+use std::fmt::Write;
+use std::hash::BuildHasherDefault;
+use std::u32;

 use indexmap::{map::Entry, IndexMap};
-use log::warn;
 use rustc_hash::FxHasher;
-use serde::Serialize;
-use thiserror::Error;
-
-use super::{
-    item::{ParseItem, ParseItemSet, ParseItemSetCore, ParseItemSetEntry},
-    item_set_builder::ParseItemSetBuilder,
-};
-use crate::{
-    grammars::{LexicalGrammar, PrecedenceEntry, ReservedWordSetId, SyntaxGrammar, VariableType},
-    node_types::VariableInfo,
-    rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
-    tables::{
-        FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable,
-        ParseTableEntry, ProductionInfo, ProductionInfoId,
-    },
-};

 // For conflict reporting, each parse state is associated with an example
 // sequence of symbols that could lead to that parse state.
@ -31,7 +27,7 @@ type SymbolSequence = Vec<Symbol>;
 type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
 pub type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);

-#[derive(Clone, PartialEq)]
+#[derive(Clone)]
 struct AuxiliarySymbolInfo {
    auxiliary_symbol: Symbol,
    parent_symbols: Vec<Symbol>,
@ -65,208 +61,8 @@ struct ParseTableBuilder<'a> {
    parse_table: ParseTable,
 }

-pub type BuildTableResult<T> = Result<T, ParseTableBuilderError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum ParseTableBuilderError {
-    #[error("Unresolved conflict for symbol sequence:\n\n{0}")]
-    Conflict(#[from] ConflictError),
-    #[error("Extra rules must have unambiguous endings. Conflicting rules: {0}")]
-    AmbiguousExtra(#[from] AmbiguousExtraError),
-    #[error(
-        "The non-terminal rule `{0}` is used in a non-terminal `extra` rule, which is not allowed."
-    )]
-    ImproperNonTerminalExtra(String),
-    #[error("State count `{0}` exceeds the max value {max}.", max=u16::MAX)]
-    StateCount(usize),
-}
-
-#[derive(Default, Debug, Serialize, Error)]
-pub struct ConflictError {
-    pub symbol_sequence: Vec<String>,
-    pub conflicting_lookahead: String,
-    pub possible_interpretations: Vec<Interpretation>,
-    pub possible_resolutions: Vec<Resolution>,
-}
-
-#[derive(Default, Debug, Serialize, Error)]
-pub struct Interpretation {
-    pub preceding_symbols: Vec<String>,
-    pub variable_name: String,
-    pub production_step_symbols: Vec<String>,
-    pub step_index: u32,
-    pub done: bool,
-    pub conflicting_lookahead: String,
-    pub precedence: Option<String>,
-    pub associativity: Option<String>,
-}
-
-#[derive(Debug, Serialize)]
-pub enum Resolution {
-    Precedence { symbols: Vec<String> },
-    Associativity { symbols: Vec<String> },
-    AddConflict { symbols: Vec<String> },
-}
-
-#[derive(Debug, Serialize, Error)]
-pub struct AmbiguousExtraError {
-    pub parent_symbols: Vec<String>,
-}
-
-impl std::fmt::Display for ConflictError {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        for symbol in &self.symbol_sequence {
-            write!(f, "  {symbol}")?;
-        }
-        writeln!(f, "  •  {}  …\n", self.conflicting_lookahead)?;
-
-        writeln!(f, "Possible interpretations:\n")?;
-        let mut interpretations = self
-            .possible_interpretations
-            .iter()
-            .map(|i| {
-                let line = i.to_string();
-                let prec_line = if let (Some(precedence), Some(associativity)) =
-                    (&i.precedence, &i.associativity)
-                {
-                    Some(format!(
-                        "(precedence: {precedence}, associativity: {associativity})",
-                    ))
-                } else {
-                    i.precedence
-                        .as_ref()
-                        .map(|precedence| format!("(precedence: {precedence})"))
-                };
-
-                (line, prec_line)
-            })
-            .collect::<Vec<_>>();
-        let max_interpretation_length = interpretations
-            .iter()
-            .map(|i| i.0.chars().count())
-            .max()
-            .unwrap();
-        interpretations.sort_unstable();
-        for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
-            write!(f, "  {}:", i + 1).unwrap();
-            write!(f, "{line}")?;
-            if let Some(prec_suffix) = prec_suffix {
-                write!(
-                    f,
-                    "{:1$}",
-                    "",
-                    max_interpretation_length.saturating_sub(line.chars().count()) + 2
-                )?;
-                write!(f, "{prec_suffix}")?;
-            }
-            writeln!(f)?;
-        }
-
-        writeln!(f, "\nPossible resolutions:\n")?;
-        for (i, resolution) in self.possible_resolutions.iter().enumerate() {
-            writeln!(f, "  {}:  {resolution}", i + 1)?;
-        }
-        Ok(())
-    }
-}
-
-impl std::fmt::Display for Interpretation {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        for symbol in &self.preceding_symbols {
-            write!(f, "  {symbol}")?;
-        }
-        write!(f, "  ({}", self.variable_name)?;
-        for (i, symbol) in self.production_step_symbols.iter().enumerate() {
-            if i == self.step_index as usize {
-                write!(f, "  •")?;
-            }
-            write!(f, "  {symbol}")?;
-        }
-        write!(f, ")")?;
-        if self.done {
-            write!(f, "  •  {}  …", self.conflicting_lookahead)?;
-        }
-        Ok(())
-    }
-}
-
-impl std::fmt::Display for Resolution {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match self {
-            Self::Precedence { symbols } => {
-                write!(f, "Specify a higher precedence in ")?;
-                for (i, symbol) in symbols.iter().enumerate() {
-                    if i > 0 {
-                        write!(f, " and ")?;
-                    }
-                    write!(f, "`{symbol}`")?;
-                }
-                write!(f, " than in the other rules.")?;
-            }
-            Self::Associativity { symbols } => {
-                write!(f, "Specify a left or right associativity in ")?;
-                for (i, symbol) in symbols.iter().enumerate() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "`{symbol}`")?;
-                }
-            }
-            Self::AddConflict { symbols } => {
-                write!(f, "Add a conflict for these rules: ")?;
-                for (i, symbol) in symbols.iter().enumerate() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "`{symbol}`")?;
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
-impl std::fmt::Display for AmbiguousExtraError {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        for (i, symbol) in self.parent_symbols.iter().enumerate() {
-            if i > 0 {
-                write!(f, ", ")?;
-            }
-            write!(f, "{symbol}")?;
-        }
-        Ok(())
-    }
-}
-
 impl<'a> ParseTableBuilder<'a> {
-    fn new(
-        syntax_grammar: &'a SyntaxGrammar,
-        lexical_grammar: &'a LexicalGrammar,
-        item_set_builder: ParseItemSetBuilder<'a>,
-        variable_info: &'a [VariableInfo],
-    ) -> Self {
-        Self {
-            syntax_grammar,
-            lexical_grammar,
-            item_set_builder,
-            variable_info,
-            non_terminal_extra_states: Vec::new(),
-            state_ids_by_item_set: IndexMap::default(),
-            core_ids_by_core: HashMap::new(),
-            parse_state_info_by_id: Vec::new(),
-            parse_state_queue: VecDeque::new(),
-            actual_conflicts: syntax_grammar.expected_conflicts.iter().cloned().collect(),
-            parse_table: ParseTable {
-                states: Vec::new(),
-                symbols: Vec::new(),
-                external_lex_states: Vec::new(),
-                production_infos: Vec::new(),
-                max_aliased_production_length: 1,
-            },
-        }
-    }
-
-    fn build(mut self) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
+    fn build(mut self) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
        // Ensure that the empty alias sequence has index 0.
        self.parse_table
            .production_infos
@ -279,13 +75,10 @@ impl<'a> ParseTableBuilder<'a> {
        self.add_parse_state(
            &Vec::new(),
            &Vec::new(),
-            ParseItemSet {
-                entries: vec![ParseItemSetEntry {
-                    item: ParseItem::start(),
-                    lookaheads: std::iter::once(Symbol::end()).collect(),
-                    following_reserved_word_set: ReservedWordSetId::default(),
-                }],
-            },
+            ParseItemSet::with(std::iter::once((
+                ParseItem::start(),
+                std::iter::once(&Symbol::end()).copied().collect(),
+            ))),
        );

        // Compute the possible item sets for non-terminal extras.
@ -301,35 +94,25 @@ impl<'a> ParseTableBuilder<'a> {
                non_terminal_extra_item_sets_by_first_terminal
                    .entry(production.first_symbol().unwrap())
                    .or_insert_with(ParseItemSet::default)
-                    .insert(ParseItem {
-                        variable_index: extra_non_terminal.index as u32,
-                        production,
-                        step_index: 1,
-                        has_preceding_inherited_fields: false,
-                    })
-                    .lookaheads
-                    .insert(Symbol::end_of_nonterminal_extra());
+                    .insert(
+                        ParseItem {
+                            variable_index: extra_non_terminal.index as u32,
+                            production,
+                            step_index: 1,
+                            has_preceding_inherited_fields: false,
+                        },
+                        &std::iter::once(&Symbol::end_of_nonterminal_extra())
+                            .copied()
+                            .collect(),
+                    );
            }
        }

-        let non_terminal_sets_len = non_terminal_extra_item_sets_by_first_terminal.len();
-        self.non_terminal_extra_states
-            .reserve(non_terminal_sets_len);
-        self.parse_state_info_by_id.reserve(non_terminal_sets_len);
-        self.parse_table.states.reserve(non_terminal_sets_len);
-        self.parse_state_queue.reserve(non_terminal_sets_len);
        // Add a state for each starting terminal of a non-terminal extra rule.
        for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
-            if terminal.is_non_terminal() {
-                Err(ParseTableBuilderError::ImproperNonTerminalExtra(
-                    self.symbol_name(&terminal),
-                ))?;
-            }
-
-            // Add the parse state, and *then* push the terminal and the state id into the
-            // list of nonterminal extra states
-            let state_id = self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
-            self.non_terminal_extra_states.push((terminal, state_id));
+            self.non_terminal_extra_states
+                .push((terminal, self.parse_table.states.len()));
+            self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
        }

        while let Some(entry) = self.parse_state_queue.pop_front() {
@ -346,21 +129,17 @@ impl<'a> ParseTableBuilder<'a> {
        }

        if !self.actual_conflicts.is_empty() {
-            warn!(
-                "unnecessary conflicts:\n  {}",
-                &self
-                    .actual_conflicts
-                    .iter()
-                    .map(|conflict| {
-                        conflict
-                            .iter()
-                            .map(|symbol| format!("`{}`", self.symbol_name(symbol)))
-                            .collect::<Vec<_>>()
-                            .join(", ")
-                    })
-                    .collect::<Vec<_>>()
-                    .join("\n  ")
-            );
+            println!("Warning: unnecessary conflicts");
+            for conflict in &self.actual_conflicts {
+                println!(
+                    "  {}",
+                    conflict
+                        .iter()
+                        .map(|symbol| format!("`{}`", self.symbol_name(symbol)))
+                        .collect::<Vec<_>>()
+                        .join(", ")
+                );
+            }
        }

        Ok((self.parse_table, self.parse_state_info_by_id))
@ -394,7 +173,6 @@ impl<'a> ParseTableBuilder<'a> {
                    external_lex_state_id: 0,
                    terminal_entries: IndexMap::default(),
                    nonterminal_entries: IndexMap::default(),
-                    reserved_words: TokenSet::default(),
                    core_id,
                });
                self.parse_state_queue.push_back(ParseStateQueueEntry {
@ -410,10 +188,10 @@ impl<'a> ParseTableBuilder<'a> {
    fn add_actions(
        &mut self,
        mut preceding_symbols: SymbolSequence,
-        mut preceding_auxiliary_symbols: AuxiliarySymbolSequence,
+        mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
        state_id: ParseStateId,
        item_set: &ParseItemSet<'a>,
-    ) -> BuildTableResult<()> {
+    ) -> Result<()> {
        let mut terminal_successors = BTreeMap::new();
        let mut non_terminal_successors = BTreeMap::new();
        let mut lookaheads_with_conflicts = TokenSet::new();
@ -421,18 +199,13 @@ impl<'a> ParseTableBuilder<'a> {

        // Each item in the item set contributes to either or a Shift action or a Reduce
        // action in this state.
-        for ParseItemSetEntry {
-            item,
-            lookaheads,
-            following_reserved_word_set: reserved_lookaheads,
-        } in &item_set.entries
-        {
+        for (item, lookaheads) in &item_set.entries {
            // If the item is unfinished, then this state has a transition for the item's
            // next symbol. Advance the item to its next step and insert the resulting
            // item into the successor item set.
            if let Some(next_symbol) = item.symbol() {
                let mut successor = item.successor();
-                let successor_set = if next_symbol.is_non_terminal() {
+                if next_symbol.is_non_terminal() {
                    let variable = &self.syntax_grammar.variables[next_symbol.index];

                    // Keep track of where auxiliary non-terminals (repeat symbols) are
@ -461,16 +234,13 @@ impl<'a> ParseTableBuilder<'a> {
                    non_terminal_successors
                        .entry(next_symbol)
                        .or_insert_with(ParseItemSet::default)
+                        .insert(successor, lookaheads);
                } else {
                    terminal_successors
                        .entry(next_symbol)
                        .or_insert_with(ParseItemSet::default)
-                };
-                let successor_entry = successor_set.insert(successor);
-                successor_entry.lookaheads.insert_all(lookaheads);
-                successor_entry.following_reserved_word_set = successor_entry
-                    .following_reserved_word_set
-                    .max(*reserved_lookaheads);
+                        .insert(successor, lookaheads);
+                }
            }
            // If the item is finished, then add a Reduce action to this state based
            // on this item.
@ -523,7 +293,7 @@ impl<'a> ParseTableBuilder<'a> {
                        }
                    }

-                    reduction_info.precedence.clone_from(precedence);
+                    reduction_info.precedence = precedence.clone();
                    if let Err(i) = reduction_info.symbols.binary_search(&symbol) {
                        reduction_info.symbols.insert(i, symbol);
                    }
@ -536,9 +306,7 @@ impl<'a> ParseTableBuilder<'a> {
            }
        }

-        preceding_auxiliary_symbols.dedup();
-
-        // Having computed the successor item sets for each symbol, add a new
+        // Having computed the the successor item sets for each symbol, add a new
        // parse state for each of these item sets, and add a corresponding Shift
        // action to this state.
        for (symbol, next_item_set) in terminal_successors {
@ -597,7 +365,7 @@ impl<'a> ParseTableBuilder<'a> {
            )?;
        }

-        // Add actions for the grammar's `extra` symbols.
+        // Finally, add actions for the grammar's `extra` symbols.
        let state = &mut self.parse_table.states[state_id];
        let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra();

@ -609,7 +377,7 @@ impl<'a> ParseTableBuilder<'a> {
                let parent_symbols = item_set
                    .entries
                    .iter()
-                    .filter_map(|ParseItemSetEntry { item, .. }| {
+                    .filter_map(|(item, _)| {
                        if !item.is_augmented() && item.step_index > 0 {
                            Some(item.variable_index)
                        } else {
@ -617,18 +385,15 @@ impl<'a> ParseTableBuilder<'a> {
                        }
                    })
                    .collect::<HashSet<_>>();
-                let parent_symbol_names = parent_symbols
-                    .iter()
-                    .map(|&variable_index| {
-                        self.syntax_grammar.variables[variable_index as usize]
-                            .name
-                            .clone()
-                    })
-                    .collect::<Vec<_>>();
-
-                Err(AmbiguousExtraError {
-                    parent_symbols: parent_symbol_names,
-                })?;
+                let mut message =
+                    "Extra rules must have unambiguous endings. Conflicting rules: ".to_string();
+                for (i, variable_index) in parent_symbols.iter().enumerate() {
+                    if i > 0 {
+                        message += ", ";
+                    }
+                    message += &self.syntax_grammar.variables[*variable_index as usize].name;
+                }
+                return Err(anyhow!(message));
            }
        }
        // Add actions for the start tokens of each non-terminal extra rule.
@ -666,30 +431,6 @@ impl<'a> ParseTableBuilder<'a> {
            }
        }

-        if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
-            let reserved_word_set_id = item_set
-                .entries
-                .iter()
-                .filter_map(|entry| {
-                    if let Some(next_step) = entry.item.step() {
-                        if next_step.symbol == keyword_capture_token {
-                            Some(next_step.reserved_word_set_id)
-                        } else {
-                            None
-                        }
-                    } else if entry.lookaheads.contains(&keyword_capture_token) {
-                        Some(entry.following_reserved_word_set)
-                    } else {
-                        None
-                    }
-                })
-                .max();
-            if let Some(reserved_word_set_id) = reserved_word_set_id {
-                state.reserved_words =
-                    self.syntax_grammar.reserved_word_sets[reserved_word_set_id.0].clone();
-            }
-        }
-
        Ok(())
    }

@ -701,7 +442,7 @@ impl<'a> ParseTableBuilder<'a> {
        preceding_auxiliary_symbols: &[AuxiliarySymbolInfo],
        conflicting_lookahead: Symbol,
        reduction_info: &ReductionInfo,
-    ) -> BuildTableResult<()> {
+    ) -> Result<()> {
        let entry = self.parse_table.states[state_id]
            .terminal_entries
            .get_mut(&conflicting_lookahead)
@ -714,12 +455,9 @@ impl<'a> ParseTableBuilder<'a> {
        // REDUCE-REDUCE conflicts where all actions have the *same*
        // precedence, and there can still be SHIFT/REDUCE conflicts.
        let mut considered_associativity = false;
-        let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new();
-        let mut conflicting_items = BTreeSet::new();
-        for ParseItemSetEntry {
-            item, lookaheads, ..
-        } in &item_set.entries
-        {
+        let mut shift_precedence: Vec<(&Precedence, Symbol)> = Vec::new();
+        let mut conflicting_items = HashSet::new();
+        for (item, lookaheads) in &item_set.entries {
            if let Some(step) = item.step() {
                if item.step_index > 0
                    && self
@ -856,55 +594,93 @@ impl<'a> ParseTableBuilder<'a> {
            return Ok(());
        }

-        let mut conflict_error = ConflictError::default();
+        let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
        for symbol in preceding_symbols {
-            conflict_error
-                .symbol_sequence
-                .push(self.symbol_name(symbol));
+            write!(&mut msg, "  {}", self.symbol_name(symbol)).unwrap();
        }
-        conflict_error.conflicting_lookahead = self.symbol_name(&conflicting_lookahead);

-        let interpretations = conflicting_items
+        write!(
+            &mut msg,
+            "  •  {}  …\n\n",
+            self.symbol_name(&conflicting_lookahead)
+        )
+        .unwrap();
+        write!(&mut msg, "Possible interpretations:\n\n").unwrap();
+
+        let mut interpretations = conflicting_items
            .iter()
            .map(|item| {
-                let preceding_symbols = preceding_symbols
+                let mut line = String::new();
+                for preceding_symbol in preceding_symbols
                    .iter()
                    .take(preceding_symbols.len() - item.step_index as usize)
-                    .map(|symbol| self.symbol_name(symbol))
-                    .collect::<Vec<_>>();
+                {
+                    write!(&mut line, "  {}", self.symbol_name(preceding_symbol)).unwrap();
+                }

-                let variable_name = self.syntax_grammar.variables[item.variable_index as usize]
-                    .name
-                    .clone();
+                write!(
+                    &mut line,
+                    "  ({}",
+                    &self.syntax_grammar.variables[item.variable_index as usize].name
+                )
+                .unwrap();

-                let production_step_symbols = item
-                    .production
-                    .steps
-                    .iter()
-                    .map(|step| self.symbol_name(&step.symbol))
-                    .collect::<Vec<_>>();
+                for (j, step) in item.production.steps.iter().enumerate() {
+                    if j as u32 == item.step_index {
+                        write!(&mut line, "  •").unwrap();
+                    }
+                    write!(&mut line, "  {}", self.symbol_name(&step.symbol)).unwrap();
+                }

-                let precedence = match item.precedence() {
-                    Precedence::None => None,
-                    _ => Some(item.precedence().to_string()),
+                write!(&mut line, ")").unwrap();
+
+                if item.is_done() {
+                    write!(
+                        &mut line,
+                        "  •  {}  …",
+                        self.symbol_name(&conflicting_lookahead)
+                    )
+                    .unwrap();
+                }
+
+                let precedence = item.precedence();
+                let associativity = item.associativity();
+
+                let prec_line = if let Some(associativity) = associativity {
+                    Some(format!(
+                        "(precedence: {precedence}, associativity: {associativity:?})",
+                    ))
+                } else if !precedence.is_none() {
+                    Some(format!("(precedence: {precedence})"))
+                } else {
+                    None
                };

-                let associativity = item.associativity().map(|assoc| format!("{assoc:?}"));
-
-                Interpretation {
-                    preceding_symbols,
-                    variable_name,
-                    production_step_symbols,
-                    step_index: item.step_index,
-                    done: item.is_done(),
-                    conflicting_lookahead: self.symbol_name(&conflicting_lookahead),
-                    precedence,
-                    associativity,
-                }
+                (line, prec_line)
            })
            .collect::<Vec<_>>();
-        conflict_error.possible_interpretations = interpretations;

+        let max_interpretation_length = interpretations
+            .iter()
+            .map(|i| i.0.chars().count())
+            .max()
+            .unwrap();
+        interpretations.sort_unstable();
+        for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
+            write!(&mut msg, "  {}:", i + 1).unwrap();
+            msg += &line;
+            if let Some(prec_suffix) = prec_suffix {
+                for _ in line.chars().count()..max_interpretation_length {
+                    msg.push(' ');
+                }
+                msg += "  ";
+                msg += &prec_suffix;
+            }
+            msg.push('\n');
+        }
+
+        let mut resolution_count = 0;
+        write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
        let mut shift_items = Vec::new();
        let mut reduce_items = Vec::new();
        for item in conflicting_items {
@ -917,57 +693,76 @@ impl<'a> ParseTableBuilder<'a> {
        shift_items.sort_unstable();
        reduce_items.sort_unstable();

-        let get_rule_names = |items: &[&ParseItem]| -> Vec<String> {
+        let list_rule_names = |mut msg: &mut String, items: &[&ParseItem]| {
            let mut last_rule_id = None;
-            let mut result = Vec::with_capacity(items.len());
            for item in items {
                if last_rule_id == Some(item.variable_index) {
                    continue;
                }
-                last_rule_id = Some(item.variable_index);
-                result.push(self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)));
-            }

-            result
+                if last_rule_id.is_some() {
+                    write!(&mut msg, " and").unwrap();
+                }
+
+                last_rule_id = Some(item.variable_index);
+                write!(
+                    msg,
+                    " `{}`",
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
        };

        if actual_conflict.len() > 1 {
            if !shift_items.is_empty() {
-                let names = get_rule_names(&shift_items);
-                conflict_error
-                    .possible_resolutions
-                    .push(Resolution::Precedence { symbols: names });
+                resolution_count += 1;
+                write!(
+                    &mut msg,
+                    "  {resolution_count}:  Specify a higher precedence in",
+                )
+                .unwrap();
+                list_rule_names(&mut msg, &shift_items);
+                writeln!(&mut msg, " than in the other rules.").unwrap();
            }

            for item in &reduce_items {
-                let name = self.symbol_name(&Symbol::non_terminal(item.variable_index as usize));
-                conflict_error
-                    .possible_resolutions
-                    .push(Resolution::Precedence {
-                        symbols: vec![name],
-                    });
+                resolution_count += 1;
+                writeln!(
+                    &mut msg,
+                    "  {resolution_count}:  Specify a higher precedence in `{}` than in the other rules.",
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
            }
        }

        if considered_associativity {
-            let names = get_rule_names(&reduce_items);
-            conflict_error
-                .possible_resolutions
-                .push(Resolution::Associativity { symbols: names });
+            resolution_count += 1;
+            write!(
+                &mut msg,
+                "  {resolution_count}:  Specify a left or right associativity in",
+            )
+            .unwrap();
+            list_rule_names(&mut msg, &reduce_items);
+            writeln!(&mut msg).unwrap();
        }

-        conflict_error
-            .possible_resolutions
-            .push(Resolution::AddConflict {
-                symbols: actual_conflict
-                    .iter()
-                    .map(|s| self.symbol_name(s))
-                    .collect(),
-            });
+        resolution_count += 1;
+        write!(
+            &mut msg,
+            "  {resolution_count}:  Add a conflict for these rules: ",
+        )
+        .unwrap();
+        for (i, symbol) in actual_conflict.iter().enumerate() {
+            if i > 0 {
+                write!(&mut msg, ", ").unwrap();
+            }
+            write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
+        }
+        writeln!(&mut msg).unwrap();

-        self.actual_conflicts.insert(actual_conflict);
-
-        Err(conflict_error)?
+        Err(anyhow!(msg))
    }

    fn compare_precedence(
@ -1036,7 +831,7 @@ impl<'a> ParseTableBuilder<'a> {
        let parent_symbols = item_set
            .entries
            .iter()
-            .filter_map(|ParseItemSetEntry { item, .. }| {
+            .filter_map(|(item, _)| {
                let variable_index = item.variable_index as usize;
                if item.symbol() == Some(symbol)
                    && !self.syntax_grammar.variables[variable_index].is_auxiliary()
@ -1124,24 +919,84 @@ impl<'a> ParseTableBuilder<'a> {
                if variable.kind == VariableType::Named {
                    variable.name.clone()
                } else {
-                    format!("'{}'", variable.name)
+                    format!("'{}'", &variable.name)
                }
            }
        }
    }
 }

+fn populate_following_tokens(
+    result: &mut [TokenSet],
+    grammar: &SyntaxGrammar,
+    inlines: &InlinedProductionMap,
+    builder: &ParseItemSetBuilder,
+) {
+    let productions = grammar
+        .variables
+        .iter()
+        .flat_map(|v| &v.productions)
+        .chain(&inlines.productions);
+    let all_tokens = (0..result.len())
+        .map(Symbol::terminal)
+        .collect::<TokenSet>();
+    for production in productions {
+        for i in 1..production.steps.len() {
+            let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
+            let right_tokens = builder.first_set(&production.steps[i].symbol);
+            for left_token in left_tokens.iter() {
+                if left_token.is_terminal() {
+                    result[left_token.index].insert_all_terminals(right_tokens);
+                }
+            }
+        }
+    }
+    for extra in &grammar.extra_symbols {
+        if extra.is_terminal() {
+            for entry in result.iter_mut() {
+                entry.insert(*extra);
+            }
+            result[extra.index] = all_tokens.clone();
+        }
+    }
+}
+
 pub fn build_parse_table<'a>(
    syntax_grammar: &'a SyntaxGrammar,
    lexical_grammar: &'a LexicalGrammar,
-    item_set_builder: ParseItemSetBuilder<'a>,
+    inlines: &'a InlinedProductionMap,
    variable_info: &'a [VariableInfo],
-) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
-    ParseTableBuilder::new(
+) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
+    let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect();
+    let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
+    let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
+    populate_following_tokens(
+        &mut following_tokens,
+        syntax_grammar,
+        inlines,
+        &item_set_builder,
+    );
+
+    let (table, item_sets) = ParseTableBuilder {
        syntax_grammar,
        lexical_grammar,
        item_set_builder,
        variable_info,
-    )
-    .build()
+        non_terminal_extra_states: Vec::new(),
+        actual_conflicts,
+        state_ids_by_item_set: IndexMap::default(),
+        core_ids_by_core: HashMap::new(),
+        parse_state_info_by_id: Vec::new(),
+        parse_state_queue: VecDeque::new(),
+        parse_table: ParseTable {
+            states: Vec::new(),
+            symbols: Vec::new(),
+            external_lex_states: Vec::new(),
+            production_infos: Vec::new(),
+            max_aliased_production_length: 1,
+        },
+    }
+    .build()?;
+
+    Ok((table, following_tokens, item_sets))
 }
--- a/crates/generate/src/build_tables/coincident_tokens.rs
+++ b/crates/generate/src/build_tables/coincident_tokens.rs
@ -1,11 +1,8 @@
+use crate::generate::grammars::LexicalGrammar;
+use crate::generate::rules::Symbol;
+use crate::generate::tables::{ParseStateId, ParseTable};
 use std::fmt;

-use crate::{
-    grammars::LexicalGrammar,
-    rules::Symbol,
-    tables::{ParseStateId, ParseTable},
-};
-
 pub struct CoincidentTokenIndex<'a> {
    entries: Vec<Vec<ParseStateId>>,
    grammar: &'a LexicalGrammar,
@ -55,7 +52,7 @@ impl<'a> CoincidentTokenIndex<'a> {
    }
 }

-impl fmt::Debug for CoincidentTokenIndex<'_> {
+impl<'a> fmt::Debug for CoincidentTokenIndex<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "CoincidentTokenIndex {{")?;

--- a/crates/generate/src/build_tables/item.rs
+++ b/crates/generate/src/build_tables/item.rs
@ -1,32 +1,26 @@
-use std::{
-    cmp::Ordering,
-    fmt,
-    hash::{Hash, Hasher},
-    sync::LazyLock,
-};
+use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
+use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet};
+use lazy_static::lazy_static;
+use std::cmp::Ordering;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::u32;

-use crate::{
-    grammars::{
-        LexicalGrammar, Production, ProductionStep, ReservedWordSetId, SyntaxGrammar,
-        NO_RESERVED_WORDS,
-    },
-    rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
-};
-
-static START_PRODUCTION: LazyLock<Production> = LazyLock::new(|| Production {
-    dynamic_precedence: 0,
-    steps: vec![ProductionStep {
-        symbol: Symbol {
-            index: 0,
-            kind: SymbolType::NonTerminal,
-        },
-        precedence: Precedence::None,
-        associativity: None,
-        alias: None,
-        field_name: None,
-        reserved_word_set_id: NO_RESERVED_WORDS,
-    }],
-});
+lazy_static! {
+    static ref START_PRODUCTION: Production = Production {
+        dynamic_precedence: 0,
+        steps: vec![ProductionStep {
+            symbol: Symbol {
+                index: 0,
+                kind: SymbolType::NonTerminal,
+            },
+            precedence: Precedence::None,
+            associativity: None,
+            alias: None,
+            field_name: None,
+        }],
+    };
+}

 /// A [`ParseItem`] represents an in-progress match of a single production in a grammar.
 #[derive(Clone, Copy, Debug)]
@ -59,14 +53,7 @@ pub struct ParseItem<'a> {
 /// to a state in the final parse table.
 #[derive(Clone, Debug, PartialEq, Eq, Default)]
 pub struct ParseItemSet<'a> {
-    pub entries: Vec<ParseItemSetEntry<'a>>,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ParseItemSetEntry<'a> {
-    pub item: ParseItem<'a>,
-    pub lookaheads: TokenSet,
-    pub following_reserved_word_set: ReservedWordSetId,
+    pub entries: Vec<(ParseItem<'a>, TokenSet)>,
 }

 /// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead
@ -141,7 +128,7 @@ impl<'a> ParseItem<'a> {

    /// Create an item like this one, but advanced by one step.
    #[must_use]
-    pub const fn successor(&self) -> Self {
+    pub const fn successor(&self) -> ParseItem<'a> {
        ParseItem {
            variable_index: self.variable_index,
            production: self.production,
@ -152,7 +139,7 @@ impl<'a> ParseItem<'a> {

    /// Create an item identical to this one, but with a different production.
    /// This is used when dynamically "inlining" certain symbols in a production.
-    pub const fn substitute_production(&self, production: &'a Production) -> Self {
+    pub const fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
        let mut result = *self;
        result.production = production;
        result
@ -160,31 +147,35 @@ impl<'a> ParseItem<'a> {
 }

 impl<'a> ParseItemSet<'a> {
-    pub fn insert(&mut self, item: ParseItem<'a>) -> &mut ParseItemSetEntry<'a> {
-        match self.entries.binary_search_by(|e| e.item.cmp(&item)) {
+    pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
+        let mut result = Self::default();
+        for (item, lookaheads) in elements {
+            result.insert(item, &lookaheads);
+        }
+        result
+    }
+
+    pub fn insert(&mut self, item: ParseItem<'a>, lookaheads: &TokenSet) -> &mut TokenSet {
+        match self.entries.binary_search_by(|(i, _)| i.cmp(&item)) {
            Err(i) => {
-                self.entries.insert(
-                    i,
-                    ParseItemSetEntry {
-                        item,
-                        lookaheads: TokenSet::new(),
-                        following_reserved_word_set: ReservedWordSetId::default(),
-                    },
-                );
-                &mut self.entries[i]
+                self.entries.insert(i, (item, lookaheads.clone()));
+                &mut self.entries[i].1
+            }
+            Ok(i) => {
+                self.entries[i].1.insert_all(lookaheads);
+                &mut self.entries[i].1
            }
-            Ok(i) => &mut self.entries[i],
        }
    }

    pub fn core(&self) -> ParseItemSetCore<'a> {
        ParseItemSetCore {
-            entries: self.entries.iter().map(|e| e.item).collect(),
+            entries: self.entries.iter().map(|e| e.0).collect(),
        }
    }
 }

-impl fmt::Display for ParseItemDisplay<'_> {
+impl<'a> fmt::Display for ParseItemDisplay<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        if self.0.is_augmented() {
            write!(f, "START →")?;
@ -192,42 +183,35 @@ impl fmt::Display for ParseItemDisplay<'_> {
            write!(
                f,
                "{} →",
-                self.1.variables[self.0.variable_index as usize].name
+                &self.1.variables[self.0.variable_index as usize].name
            )?;
        }

        for (i, step) in self.0.production.steps.iter().enumerate() {
            if i == self.0.step_index as usize {
                write!(f, " •")?;
-                if !step.precedence.is_none()
-                    || step.associativity.is_some()
-                    || step.reserved_word_set_id != ReservedWordSetId::default()
-                {
-                    write!(f, " (")?;
-                    if !step.precedence.is_none() {
-                        write!(f, " {}", step.precedence)?;
+                if let Some(associativity) = step.associativity {
+                    if step.precedence.is_none() {
+                        write!(f, " ({associativity:?})")?;
+                    } else {
+                        write!(f, " ({} {associativity:?})", step.precedence)?;
                    }
-                    if let Some(associativity) = step.associativity {
-                        write!(f, " {associativity:?}")?;
-                    }
-                    if step.reserved_word_set_id != ReservedWordSetId::default() {
-                        write!(f, "reserved: {}", step.reserved_word_set_id)?;
-                    }
-                    write!(f, " )")?;
+                } else if !step.precedence.is_none() {
+                    write!(f, " ({})", step.precedence)?;
                }
            }

            write!(f, " ")?;
            if step.symbol.is_terminal() {
                if let Some(variable) = self.2.variables.get(step.symbol.index) {
-                    write!(f, "{}", variable.name)?;
+                    write!(f, "{}", &variable.name)?;
                } else {
                    write!(f, "terminal-{}", step.symbol.index)?;
                }
            } else if step.symbol.is_external() {
-                write!(f, "{}", self.1.external_tokens[step.symbol.index].name)?;
+                write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
            } else {
-                write!(f, "{}", self.1.variables[step.symbol.index].name)?;
+                write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
            }

            if let Some(alias) = &step.alias {
@ -254,33 +238,7 @@ impl fmt::Display for ParseItemDisplay<'_> {
    }
 }

-const fn escape_invisible(c: char) -> Option<&'static str> {
-    Some(match c {
-        '\n' => "\\n",
-        '\r' => "\\r",
-        '\t' => "\\t",
-        '\0' => "\\0",
-        '\\' => "\\\\",
-        '\x0b' => "\\v",
-        '\x0c' => "\\f",
-        _ => return None,
-    })
-}
-
-fn display_variable_name(source: &str) -> String {
-    source
-        .chars()
-        .fold(String::with_capacity(source.len()), |mut acc, c| {
-            if let Some(esc) = escape_invisible(c) {
-                acc.push_str(esc);
-            } else {
-                acc.push(c);
-            }
-            acc
-        })
-}
-
-impl fmt::Display for TokenSetDisplay<'_> {
+impl<'a> fmt::Display for TokenSetDisplay<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "[")?;
        for (i, symbol) in self.0.iter().enumerate() {
@ -290,14 +248,14 @@ impl fmt::Display for TokenSetDisplay<'_> {

            if symbol.is_terminal() {
                if let Some(variable) = self.2.variables.get(symbol.index) {
-                    write!(f, "{}", display_variable_name(&variable.name))?;
+                    write!(f, "{}", &variable.name)?;
                } else {
                    write!(f, "terminal-{}", symbol.index)?;
                }
            } else if symbol.is_external() {
-                write!(f, "{}", self.1.external_tokens[symbol.index].name)?;
+                write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
            } else {
-                write!(f, "{}", self.1.variables[symbol.index].name)?;
+                write!(f, "{}", &self.1.variables[symbol.index].name)?;
            }
        }
        write!(f, "]")?;
@ -305,29 +263,21 @@ impl fmt::Display for TokenSetDisplay<'_> {
    }
 }

-impl fmt::Display for ParseItemSetDisplay<'_> {
+impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        for entry in &self.0.entries {
-            write!(
+        for (item, lookaheads) in &self.0.entries {
+            writeln!(
                f,
                "{}\t{}",
-                ParseItemDisplay(&entry.item, self.1, self.2),
-                TokenSetDisplay(&entry.lookaheads, self.1, self.2),
+                ParseItemDisplay(item, self.1, self.2),
+                TokenSetDisplay(lookaheads, self.1, self.2)
            )?;
-            if entry.following_reserved_word_set != ReservedWordSetId::default() {
-                write!(
-                    f,
-                    "\treserved word set: {}",
-                    entry.following_reserved_word_set
-                )?;
-            }
-            writeln!(f)?;
        }
        Ok(())
    }
 }

-impl Hash for ParseItem<'_> {
+impl<'a> Hash for ParseItem<'a> {
    fn hash<H: Hasher>(&self, hasher: &mut H) {
        hasher.write_u32(self.variable_index);
        hasher.write_u32(self.step_index);
@ -341,7 +291,7 @@ impl Hash for ParseItem<'_> {
        // this item, unless any of the following are true:
        //   * the children have fields
        //   * the children have aliases
-        //   * the children are hidden and represent rules that have fields.
+        //   * the children are hidden and
        // See the docs for `has_preceding_inherited_fields`.
        for step in &self.production.steps[0..self.step_index as usize] {
            step.alias.hash(hasher);
@ -356,7 +306,7 @@ impl Hash for ParseItem<'_> {
    }
 }

-impl PartialEq for ParseItem<'_> {
+impl<'a> PartialEq for ParseItem<'a> {
    fn eq(&self, other: &Self) -> bool {
        if self.variable_index != other.variable_index
            || self.step_index != other.step_index
@ -393,7 +343,7 @@ impl PartialEq for ParseItem<'_> {
    }
 }

-impl Ord for ParseItem<'_> {
+impl<'a> Ord for ParseItem<'a> {
    fn cmp(&self, other: &Self) -> Ordering {
        self.step_index
            .cmp(&other.step_index)
@ -433,26 +383,25 @@ impl Ord for ParseItem<'_> {
    }
 }

-impl PartialOrd for ParseItem<'_> {
+impl<'a> PartialOrd for ParseItem<'a> {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }

-impl Eq for ParseItem<'_> {}
+impl<'a> Eq for ParseItem<'a> {}

-impl Hash for ParseItemSet<'_> {
+impl<'a> Hash for ParseItemSet<'a> {
    fn hash<H: Hasher>(&self, hasher: &mut H) {
        hasher.write_usize(self.entries.len());
-        for entry in &self.entries {
-            entry.item.hash(hasher);
-            entry.lookaheads.hash(hasher);
-            entry.following_reserved_word_set.hash(hasher);
+        for (item, lookaheads) in &self.entries {
+            item.hash(hasher);
+            lookaheads.hash(hasher);
        }
    }
 }

-impl Hash for ParseItemSetCore<'_> {
+impl<'a> Hash for ParseItemSetCore<'a> {
    fn hash<H: Hasher>(&self, hasher: &mut H) {
        hasher.write_usize(self.entries.len());
        for item in &self.entries {
--- a/crates/generate/src/build_tables/item_set_builder.rs
+++ b/crates/generate/src/build_tables/item_set_builder.rs
@ -1,13 +1,8 @@
-use std::{
-    collections::{HashMap, HashSet},
-    fmt,
-};
-
-use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, ParseItemSetEntry, TokenSetDisplay};
-use crate::{
-    grammars::{InlinedProductionMap, LexicalGrammar, ReservedWordSetId, SyntaxGrammar},
-    rules::{Symbol, SymbolType, TokenSet},
-};
+use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
+use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::generate::rules::{Symbol, SymbolType, TokenSet};
+use std::collections::{HashMap, HashSet};
+use std::fmt;

 #[derive(Clone, Debug, PartialEq, Eq)]
 struct TransitiveClosureAddition<'a> {
@ -15,10 +10,9 @@ struct TransitiveClosureAddition<'a> {
    info: FollowSetInfo,
 }

-#[derive(Clone, Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 struct FollowSetInfo {
    lookaheads: TokenSet,
-    reserved_lookaheads: ReservedWordSetId,
    propagates_lookaheads: bool,
 }

@ -26,7 +20,6 @@ pub struct ParseItemSetBuilder<'a> {
    syntax_grammar: &'a SyntaxGrammar,
    lexical_grammar: &'a LexicalGrammar,
    first_sets: HashMap<Symbol, TokenSet>,
-    reserved_first_sets: HashMap<Symbol, ReservedWordSetId>,
    last_sets: HashMap<Symbol, TokenSet>,
    inlines: &'a InlinedProductionMap,
    transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
@ -48,7 +41,6 @@ impl<'a> ParseItemSetBuilder<'a> {
            syntax_grammar,
            lexical_grammar,
            first_sets: HashMap::new(),
-            reserved_first_sets: HashMap::new(),
            last_sets: HashMap::new(),
            inlines,
            transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
@ -57,7 +49,8 @@ impl<'a> ParseItemSetBuilder<'a> {
        // For each grammar symbol, populate the FIRST and LAST sets: the set of
        // terminals that appear at the beginning and end that symbol's productions,
        // respectively.
-        // For a terminal symbol, the FIRST and LAST sets just consist of the
+        //
+        // For a terminal symbol, the FIRST and LAST set just consists of the
        // terminal itself.
        for i in 0..lexical_grammar.variables.len() {
            let symbol = Symbol::terminal(i);
@ -65,9 +58,6 @@ impl<'a> ParseItemSetBuilder<'a> {
            set.insert(symbol);
            result.first_sets.insert(symbol, set.clone());
            result.last_sets.insert(symbol, set);
-            result
-                .reserved_first_sets
-                .insert(symbol, ReservedWordSetId::default());
        }

        for i in 0..syntax_grammar.external_tokens.len() {
@ -76,15 +66,12 @@ impl<'a> ParseItemSetBuilder<'a> {
            set.insert(symbol);
            result.first_sets.insert(symbol, set.clone());
            result.last_sets.insert(symbol, set);
-            result
-                .reserved_first_sets
-                .insert(symbol, ReservedWordSetId::default());
        }

-        // The FIRST set of a non-terminal `i` is the union of the FIRST sets
-        // of all the symbols that appear at the beginnings of i's productions. Some
-        // of these symbols may themselves be non-terminals, so this is a recursive
-        // definition.
+        // The FIRST set of a non-terminal `i` is the union of the following sets:
+        // * the set of all terminals that appear at the beginnings of i's productions
+        // * the FIRST sets of all the non-terminals that appear at the beginnings
+        //   of i's productions
        //
        // Rather than computing these sets using recursion, we use an explicit stack
        // called `symbols_to_process`.
@ -92,36 +79,37 @@ impl<'a> ParseItemSetBuilder<'a> {
        let mut processed_non_terminals = HashSet::new();
        for i in 0..syntax_grammar.variables.len() {
            let symbol = Symbol::non_terminal(i);
-            let first_set = result.first_sets.entry(symbol).or_default();
-            let reserved_first_set = result.reserved_first_sets.entry(symbol).or_default();

+            let first_set = result
+                .first_sets
+                .entry(symbol)
+                .or_insert_with(TokenSet::new);
            processed_non_terminals.clear();
            symbols_to_process.clear();
            symbols_to_process.push(symbol);
-            while let Some(sym) = symbols_to_process.pop() {
-                for production in &syntax_grammar.variables[sym.index].productions {
-                    if let Some(step) = production.steps.first() {
-                        if step.symbol.is_terminal() || step.symbol.is_external() {
-                            first_set.insert(step.symbol);
-                        } else if processed_non_terminals.insert(step.symbol) {
+            while let Some(current_symbol) = symbols_to_process.pop() {
+                if current_symbol.is_terminal() || current_symbol.is_external() {
+                    first_set.insert(current_symbol);
+                } else if processed_non_terminals.insert(current_symbol) {
+                    for production in &syntax_grammar.variables[current_symbol.index].productions {
+                        if let Some(step) = production.steps.first() {
                            symbols_to_process.push(step.symbol);
                        }
-                        *reserved_first_set = (*reserved_first_set).max(step.reserved_word_set_id);
                    }
                }
            }

            // The LAST set is defined in a similar way to the FIRST set.
-            let last_set = result.last_sets.entry(symbol).or_default();
+            let last_set = result.last_sets.entry(symbol).or_insert_with(TokenSet::new);
            processed_non_terminals.clear();
            symbols_to_process.clear();
            symbols_to_process.push(symbol);
-            while let Some(sym) = symbols_to_process.pop() {
-                for production in &syntax_grammar.variables[sym.index].productions {
-                    if let Some(step) = production.steps.last() {
-                        if step.symbol.is_terminal() || step.symbol.is_external() {
-                            last_set.insert(step.symbol);
-                        } else if processed_non_terminals.insert(step.symbol) {
+            while let Some(current_symbol) = symbols_to_process.pop() {
+                if current_symbol.is_terminal() || current_symbol.is_external() {
+                    last_set.insert(current_symbol);
+                } else if processed_non_terminals.insert(current_symbol) {
+                    for production in &syntax_grammar.variables[current_symbol.index].productions {
+                        if let Some(step) = production.steps.last() {
                            symbols_to_process.push(step.symbol);
                        }
                    }
@ -131,75 +119,67 @@ impl<'a> ParseItemSetBuilder<'a> {

        // To compute an item set's transitive closure, we find each item in the set
        // whose next symbol is a non-terminal, and we add new items to the set for
-        // each of that symbol's productions. These productions might themselves begin
+        // each of that symbols' productions. These productions might themselves begin
        // with non-terminals, so the process continues recursively. In this process,
        // the total set of entries that get added depends only on two things:
-        //
-        //   * the non-terminal symbol that occurs next in each item
-        //
-        //   * the set of terminals that can follow that non-terminal symbol in the item
+        //   * the set of non-terminal symbols that occur at each item's current position
+        //   * the set of terminals that occurs after each of these non-terminal symbols
        //
        // So we can avoid a lot of duplicated recursive work by precomputing, for each
        // non-terminal symbol `i`, a final list of *additions* that must be made to an
-        // item set when symbol `i` occurs as the next symbol in one if its core items.
-        // The structure of a precomputed *addition* is as follows:
+        // item set when `i` occurs as the next symbol in one if its core items. The
+        // structure of an *addition* is as follows:
+        //   * `item` - the new item that must be added as part of the expansion of `i`
+        //   * `lookaheads` - lookahead tokens that can always come after that item in
+        //      the expansion of `i`
+        //   * `propagates_lookaheads` - a boolean indicating whether or not `item` can
+        //      occur at the *end* of the expansion of `i`, so that i's own current
+        //      lookahead tokens can occur after `item`.
        //
-        //   * `item` - the new item that must be added as part of the expansion of the symbol `i`.
-        //
-        //   * `lookaheads` - the set of possible lookahead tokens that can always come after `item`
-        //     in an expansion of symbol `i`.
-        //
-        //   * `reserved_lookaheads` - the set of reserved lookahead lookahead tokens that can
-        //     always come after `item` in the expansion of symbol `i`.
-        //
-        //   * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the
-        //     *end* of the expansion of symbol `i`, so that i's own current lookahead tokens can
-        //     occur after `item`.
-        //
-        // Rather than computing these additions recursively, we use an explicit stack.
-        let empty_lookaheads = TokenSet::new();
-        let mut stack = Vec::new();
-        let mut follow_set_info_by_non_terminal = HashMap::<usize, FollowSetInfo>::new();
+        // Again, rather than computing these additions recursively, we use an explicit
+        // stack called `entries_to_process`.
        for i in 0..syntax_grammar.variables.len() {
+            let empty_lookaheads = TokenSet::new();
+            let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
+
            // First, build up a map whose keys are all of the non-terminals that can
            // appear at the beginning of non-terminal `i`, and whose values store
-            // information about the tokens that can follow those non-terminals.
-            stack.clear();
-            stack.push((i, &empty_lookaheads, ReservedWordSetId::default(), true));
-            follow_set_info_by_non_terminal.clear();
-            while let Some((sym_ix, lookaheads, reserved_word_set_id, propagates_lookaheads)) =
-                stack.pop()
-            {
-                let mut did_add = false;
-                let info = follow_set_info_by_non_terminal.entry(sym_ix).or_default();
-                did_add |= info.lookaheads.insert_all(lookaheads);
-                if reserved_word_set_id > info.reserved_lookaheads {
-                    info.reserved_lookaheads = reserved_word_set_id;
-                    did_add = true;
-                }
-                did_add |= propagates_lookaheads && !info.propagates_lookaheads;
-                info.propagates_lookaheads |= propagates_lookaheads;
-                if !did_add {
-                    continue;
+            // information about the tokens that can follow each non-terminal.
+            let mut follow_set_info_by_non_terminal = HashMap::new();
+            while let Some(entry) = entries_to_process.pop() {
+                let (variable_index, lookaheads, propagates_lookaheads) = entry;
+                let existing_info = follow_set_info_by_non_terminal
+                    .entry(variable_index)
+                    .or_insert_with(|| FollowSetInfo {
+                        lookaheads: TokenSet::new(),
+                        propagates_lookaheads: false,
+                    });
+
+                let did_add_follow_set_info;
+                if propagates_lookaheads {
+                    did_add_follow_set_info = !existing_info.propagates_lookaheads;
+                    existing_info.propagates_lookaheads = true;
+                } else {
+                    did_add_follow_set_info = existing_info.lookaheads.insert_all(lookaheads);
                }

-                for production in &syntax_grammar.variables[sym_ix].productions {
-                    if let Some(symbol) = production.first_symbol() {
-                        if symbol.is_non_terminal() {
-                            if let Some(next_step) = production.steps.get(1) {
-                                stack.push((
-                                    symbol.index,
-                                    &result.first_sets[&next_step.symbol],
-                                    result.reserved_first_sets[&next_step.symbol],
-                                    false,
-                                ));
-                            } else {
-                                stack.push((
-                                    symbol.index,
-                                    lookaheads,
-                                    reserved_word_set_id,
-                                    propagates_lookaheads,
-                                ));
+                if did_add_follow_set_info {
+                    for production in &syntax_grammar.variables[variable_index].productions {
+                        if let Some(symbol) = production.first_symbol() {
+                            if symbol.is_non_terminal() {
+                                if production.steps.len() == 1 {
+                                    entries_to_process.push((
+                                        symbol.index,
+                                        lookaheads,
+                                        propagates_lookaheads,
+                                    ));
+                                } else {
+                                    entries_to_process.push((
+                                        symbol.index,
+                                        &result.first_sets[&production.steps[1].symbol],
+                                        false,
+                                    ));
+                                }
                            }
                        }
                    }
@ -209,7 +189,7 @@ impl<'a> ParseItemSetBuilder<'a> {
            // Store all of those non-terminals' productions, along with their associated
            // lookahead info, as *additions* associated with non-terminal `i`.
            let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
-            for (&variable_index, follow_set_info) in &follow_set_info_by_non_terminal {
+            for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
                let variable = &syntax_grammar.variables[variable_index];
                let non_terminal = Symbol::non_terminal(variable_index);
                let variable_index = variable_index as u32;
@ -252,25 +232,22 @@ impl<'a> ParseItemSetBuilder<'a> {
        result
    }

-    pub fn transitive_closure(&self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
+    pub fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
        let mut result = ParseItemSet::default();
-        for entry in &item_set.entries {
+        for (item, lookaheads) in &item_set.entries {
            if let Some(productions) = self
                .inlines
-                .inlined_productions(entry.item.production, entry.item.step_index)
+                .inlined_productions(item.production, item.step_index)
            {
                for production in productions {
                    self.add_item(
                        &mut result,
-                        &ParseItemSetEntry {
-                            item: entry.item.substitute_production(production),
-                            lookaheads: entry.lookaheads.clone(),
-                            following_reserved_word_set: entry.following_reserved_word_set,
-                        },
+                        item.substitute_production(production),
+                        lookaheads,
                    );
                }
            } else {
-                self.add_item(&mut result, entry);
+                self.add_item(&mut result, *item, lookaheads);
            }
        }
        result
@ -280,68 +257,34 @@ impl<'a> ParseItemSetBuilder<'a> {
        &self.first_sets[symbol]
    }

-    pub fn reserved_first_set(&self, symbol: &Symbol) -> Option<&TokenSet> {
-        let id = *self.reserved_first_sets.get(symbol)?;
-        Some(&self.syntax_grammar.reserved_word_sets[id.0])
-    }
-
    pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
        &self.last_sets[symbol]
    }

-    fn add_item(&self, set: &mut ParseItemSet<'a>, entry: &ParseItemSetEntry<'a>) {
-        if let Some(step) = entry.item.step() {
+    fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
+        if let Some(step) = item.step() {
            if step.symbol.is_non_terminal() {
-                let next_step = entry.item.successor().step();
+                let next_step = item.successor().step();

                // Determine which tokens can follow this non-terminal.
-                let (following_tokens, following_reserved_tokens) =
-                    if let Some(next_step) = next_step {
-                        (
-                            self.first_sets.get(&next_step.symbol).unwrap(),
-                            *self.reserved_first_sets.get(&next_step.symbol).unwrap(),
-                        )
-                    } else {
-                        (&entry.lookaheads, entry.following_reserved_word_set)
-                    };
+                let following_tokens = next_step.map_or(lookaheads, |next_step| {
+                    self.first_sets.get(&next_step.symbol).unwrap()
+                });

                // Use the pre-computed *additions* to expand the non-terminal.
                for addition in &self.transitive_closure_additions[step.symbol.index] {
-                    let entry = set.insert(addition.item);
-                    entry.lookaheads.insert_all(&addition.info.lookaheads);
-
-                    if let Some(word_token) = self.syntax_grammar.word_token {
-                        if addition.info.lookaheads.contains(&word_token) {
-                            entry.following_reserved_word_set = entry
-                                .following_reserved_word_set
-                                .max(addition.info.reserved_lookaheads);
-                        }
-                    }
-
+                    let lookaheads = set.insert(addition.item, &addition.info.lookaheads);
                    if addition.info.propagates_lookaheads {
-                        entry.lookaheads.insert_all(following_tokens);
-
-                        if let Some(word_token) = self.syntax_grammar.word_token {
-                            if following_tokens.contains(&word_token) {
-                                entry.following_reserved_word_set = entry
-                                    .following_reserved_word_set
-                                    .max(following_reserved_tokens);
-                            }
-                        }
+                        lookaheads.insert_all(following_tokens);
                    }
                }
            }
        }
-
-        let e = set.insert(entry.item);
-        e.lookaheads.insert_all(&entry.lookaheads);
-        e.following_reserved_word_set = e
-            .following_reserved_word_set
-            .max(entry.following_reserved_word_set);
+        set.insert(item, lookaheads);
    }
 }

-impl fmt::Debug for ParseItemSetBuilder<'_> {
+impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "ParseItemSetBuilder {{")?;

--- a/crates/generate/src/build_tables/minimize_parse_table.rs
+++ b/crates/generate/src/build_tables/minimize_parse_table.rs
@ -1,18 +1,13 @@
-use std::{
-    collections::{HashMap, HashSet},
-    mem,
-};
-
-use log::debug;
-
 use super::token_conflicts::TokenConflictMap;
-use crate::{
-    dedup::split_state_id_groups,
-    grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
-    rules::{AliasMap, Symbol, TokenSet},
-    tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry},
-    OptLevel,
+use crate::generate::dedup::split_state_id_groups;
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::generate::rules::{AliasMap, Symbol, TokenSet};
+use crate::generate::tables::{
+    GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
 };
+use log::info;
+use std::collections::{HashMap, HashSet};
+use std::mem;

 pub fn minimize_parse_table(
    parse_table: &mut ParseTable,
@ -21,7 +16,6 @@ pub fn minimize_parse_table(
    simple_aliases: &AliasMap,
    token_conflict_map: &TokenConflictMap,
    keywords: &TokenSet,
-    optimizations: OptLevel,
 ) {
    let mut minimizer = Minimizer {
        parse_table,
@ -31,9 +25,7 @@ pub fn minimize_parse_table(
        keywords,
        simple_aliases,
    };
-    if optimizations.contains(OptLevel::MergeStates) {
-        minimizer.merge_compatible_states();
-    }
+    minimizer.merge_compatible_states();
    minimizer.remove_unit_reductions();
    minimizer.remove_unused_states();
    minimizer.reorder_states_by_descending_size();
@ -48,7 +40,7 @@ struct Minimizer<'a> {
    simple_aliases: &'a AliasMap,
 }

-impl Minimizer<'_> {
+impl<'a> Minimizer<'a> {
    fn remove_unit_reductions(&mut self) {
        let mut aliased_symbols = HashSet::new();
        for variable in &self.syntax_grammar.variables {
@ -74,17 +66,18 @@ impl Minimizer<'_> {
                            production_id: 0,
                            symbol,
                            ..
-                        } if !self.simple_aliases.contains_key(symbol)
-                            && !self.syntax_grammar.supertype_symbols.contains(symbol)
-                            && !self.syntax_grammar.extra_symbols.contains(symbol)
-                            && !aliased_symbols.contains(symbol)
-                            && self.syntax_grammar.variables[symbol.index].kind
-                                != VariableType::Named
-                            && (unit_reduction_symbol.is_none()
-                                || unit_reduction_symbol == Some(symbol)) =>
-                        {
-                            unit_reduction_symbol = Some(symbol);
-                            continue;
+                        } => {
+                            if !self.simple_aliases.contains_key(symbol)
+                                && !self.syntax_grammar.supertype_symbols.contains(symbol)
+                                && !aliased_symbols.contains(symbol)
+                                && self.syntax_grammar.variables[symbol.index].kind
+                                    != VariableType::Named
+                                && (unit_reduction_symbol.is_none()
+                                    || unit_reduction_symbol == Some(symbol))
+                            {
+                                unit_reduction_symbol = Some(symbol);
+                                continue;
+                            }
                        }
                        _ => {}
                    }
@ -155,7 +148,9 @@ impl Minimizer<'_> {
            &mut group_ids_by_state_id,
            0,
            |left, right, groups| self.state_successors_differ(left, right, groups),
-        ) {}
+        ) {
+            continue;
+        }

        let error_group_index = state_ids_by_group_id
            .iter()
@ -172,12 +167,17 @@ impl Minimizer<'_> {
        let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
        for state_ids in &state_ids_by_group_id {
            // Initialize the new state based on the first old state in the group.
-            let mut parse_state = mem::take(&mut self.parse_table.states[state_ids[0]]);
+            let mut parse_state = ParseState::default();
+            mem::swap(&mut parse_state, &mut self.parse_table.states[state_ids[0]]);

            // Extend the new state with all of the actions from the other old states
            // in the group.
            for state_id in &state_ids[1..] {
-                let other_parse_state = mem::take(&mut self.parse_table.states[*state_id]);
+                let mut other_parse_state = ParseState::default();
+                mem::swap(
+                    &mut other_parse_state,
+                    &mut self.parse_table.states[*state_id],
+                );

                parse_state
                    .terminal_entries
@ -185,12 +185,6 @@ impl Minimizer<'_> {
                parse_state
                    .nonterminal_entries
                    .extend(other_parse_state.nonterminal_entries);
-                parse_state
-                    .reserved_words
-                    .insert_all(&other_parse_state.reserved_words);
-                for symbol in parse_state.terminal_entries.keys() {
-                    parse_state.reserved_words.remove(symbol);
-                }
            }

            // Update the new state's outgoing references using the new grouping.
@ -219,14 +213,24 @@ impl Minimizer<'_> {
                ) {
                    return true;
                }
-            } else if self.token_conflicts(left_state.id, right_state.id, right_state, *token) {
+            } else if self.token_conflicts(
+                left_state.id,
+                right_state.id,
+                right_state.terminal_entries.keys(),
+                *token,
+            ) {
                return true;
            }
        }

        for token in right_state.terminal_entries.keys() {
            if !left_state.terminal_entries.contains_key(token)
-                && self.token_conflicts(left_state.id, right_state.id, left_state, *token)
+                && self.token_conflicts(
+                    left_state.id,
+                    right_state.id,
+                    left_state.terminal_entries.keys(),
+                    *token,
+                )
            {
                return true;
            }
@ -248,7 +252,7 @@ impl Minimizer<'_> {
                        let group1 = group_ids_by_state_id[*s1];
                        let group2 = group_ids_by_state_id[*s2];
                        if group1 != group2 {
-                            debug!(
+                            info!(
                                "split states {} {} - successors for {} are split: {s1} {s2}",
                                state1.id,
                                state2.id,
@ -264,12 +268,12 @@ impl Minimizer<'_> {
        for (symbol, s1) in &state1.nonterminal_entries {
            if let Some(s2) = state2.nonterminal_entries.get(symbol) {
                match (s1, s2) {
-                    (GotoAction::ShiftExtra, GotoAction::ShiftExtra) => {}
+                    (GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
                    (GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
                        let group1 = group_ids_by_state_id[*s1];
                        let group2 = group_ids_by_state_id[*s2];
                        if group1 != group2 {
-                            debug!(
+                            info!(
                                "split states {} {} - successors for {} are split: {s1} {s2}",
                                state1.id,
                                state2.id,
@ -299,14 +303,16 @@ impl Minimizer<'_> {
        let actions1 = &entry1.actions;
        let actions2 = &entry2.actions;
        if actions1.len() != actions2.len() {
-            debug!(
+            info!(
                "split states {state_id1} {state_id2} - differing action counts for token {}",
                self.symbol_name(token)
            );
            return true;
        }

-        for (action1, action2) in actions1.iter().zip(actions2.iter()) {
+        for (i, action1) in actions1.iter().enumerate() {
+            let action2 = &actions2[i];
+
            // Two shift actions are equivalent if their destinations are in the same group.
            if let (
                ParseAction::Shift {
@ -324,13 +330,13 @@ impl Minimizer<'_> {
                if group1 == group2 && is_repetition1 == is_repetition2 {
                    continue;
                }
-                debug!(
+                info!(
                    "split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}",
                    self.symbol_name(token),
                );
                return true;
            } else if action1 != action2 {
-                debug!(
+                info!(
                    "split states {state_id1} {state_id2} - unequal actions for {}",
                    self.symbol_name(token),
                );
@ -341,32 +347,28 @@ impl Minimizer<'_> {
        false
    }

-    fn token_conflicts(
+    fn token_conflicts<'b>(
        &self,
        left_id: ParseStateId,
        right_id: ParseStateId,
-        right_state: &ParseState,
+        existing_tokens: impl Iterator<Item = &'b Symbol>,
        new_token: Symbol,
    ) -> bool {
        if new_token == Symbol::end_of_nonterminal_extra() {
-            debug!("split states {left_id} {right_id} - end of non-terminal extra",);
+            info!("split states {left_id} {right_id} - end of non-terminal extra",);
            return true;
        }

        // Do not add external tokens; they could conflict lexically with any of the state's
        // existing lookahead tokens.
        if new_token.is_external() {
-            debug!(
+            info!(
                "split states {left_id} {right_id} - external token {}",
                self.symbol_name(&new_token),
            );
            return true;
        }

-        if right_state.reserved_words.contains(&new_token) {
-            return false;
-        }
-
        // Do not add tokens which are both internal and external. Their validity could
        // influence the behavior of the external scanner.
        if self
@ -375,7 +377,7 @@ impl Minimizer<'_> {
            .iter()
            .any(|external| external.corresponding_internal_token == Some(new_token))
        {
-            debug!(
+            info!(
                "split states {left_id} {right_id} - internal/external token {}",
                self.symbol_name(&new_token),
            );
@ -383,30 +385,23 @@ impl Minimizer<'_> {
        }

        // Do not add a token if it conflicts with an existing token.
-        for token in right_state.terminal_entries.keys().copied() {
-            if !token.is_terminal() {
-                continue;
-            }
-            if self.syntax_grammar.word_token == Some(token) && self.keywords.contains(&new_token) {
-                continue;
-            }
-            if self.syntax_grammar.word_token == Some(new_token) && self.keywords.contains(&token) {
-                continue;
-            }
-
-            if self
-                .token_conflict_map
-                .does_conflict(new_token.index, token.index)
-                || self
+        for token in existing_tokens {
+            if token.is_terminal()
+                && !(self.syntax_grammar.word_token == Some(*token)
+                    && self.keywords.contains(&new_token))
+                && !(self.syntax_grammar.word_token == Some(new_token)
+                    && self.keywords.contains(token))
+                && (self
                    .token_conflict_map
-                    .does_match_same_string(new_token.index, token.index)
+                    .does_conflict(new_token.index, token.index)
+                    || self
+                        .token_conflict_map
+                        .does_match_same_string(new_token.index, token.index))
            {
-                debug!(
-                    "split states {} {} - token {} conflicts with {}",
-                    left_id,
-                    right_id,
+                info!(
+                    "split states {left_id} {right_id} - token {} conflicts with {}",
                    self.symbol_name(&new_token),
-                    self.symbol_name(&token),
+                    self.symbol_name(token),
                );
                return true;
            }
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@ -1,42 +1,25 @@
-mod build_lex_table;
-mod build_parse_table;
+pub mod build_lex_table;
+pub mod build_parse_table;
 mod coincident_tokens;
 mod item;
 mod item_set_builder;
 mod minimize_parse_table;
 mod token_conflicts;

+use self::build_lex_table::build_lex_table;
+use self::build_parse_table::{build_parse_table, ParseStateInfo};
+use self::coincident_tokens::CoincidentTokenIndex;
+use self::minimize_parse_table::minimize_parse_table;
+use self::token_conflicts::TokenConflictMap;
+use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::generate::nfa::NfaCursor;
+use crate::generate::node_types::VariableInfo;
+use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet};
+use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
+use anyhow::Result;
+use log::info;
 use std::collections::{BTreeSet, HashMap};

-pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT;
-use build_parse_table::BuildTableResult;
-pub use build_parse_table::ParseTableBuilderError;
-use log::{debug, info};
-
-use self::{
-    build_lex_table::build_lex_table,
-    build_parse_table::{build_parse_table, ParseStateInfo},
-    coincident_tokens::CoincidentTokenIndex,
-    item_set_builder::ParseItemSetBuilder,
-    minimize_parse_table::minimize_parse_table,
-    token_conflicts::TokenConflictMap,
-};
-use crate::{
-    grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
-    nfa::{CharacterSet, NfaCursor},
-    node_types::VariableInfo,
-    rules::{AliasMap, Symbol, SymbolType, TokenSet},
-    tables::{LexTable, ParseAction, ParseTable, ParseTableEntry},
-    OptLevel,
-};
-
-pub struct Tables {
-    pub parse_table: ParseTable,
-    pub main_lex_table: LexTable,
-    pub keyword_lex_table: LexTable,
-    pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
-}
-
 pub fn build_tables(
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
@ -44,17 +27,9 @@ pub fn build_tables(
    variable_info: &[VariableInfo],
    inlines: &InlinedProductionMap,
    report_symbol_name: Option<&str>,
-    optimizations: OptLevel,
-) -> BuildTableResult<Tables> {
-    let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
-    let following_tokens =
-        get_following_tokens(syntax_grammar, lexical_grammar, inlines, &item_set_builder);
-    let (mut parse_table, parse_state_info) = build_parse_table(
-        syntax_grammar,
-        lexical_grammar,
-        item_set_builder,
-        variable_info,
-    )?;
+) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
+    let (mut parse_table, following_tokens, parse_state_info) =
+        build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?;
    let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
    let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
    let keywords = identify_keywords(
@ -80,9 +55,8 @@ pub fn build_tables(
        simple_aliases,
        &token_conflict_map,
        &keywords,
-        optimizations,
    );
-    let lex_tables = build_lex_table(
+    let (main_lex_table, keyword_lex_table) = build_lex_table(
        &mut parse_table,
        syntax_grammar,
        lexical_grammar,
@ -102,58 +76,12 @@ pub fn build_tables(
            report_symbol_name,
        );
    }
-
-    if parse_table.states.len() > u16::MAX as usize {
-        Err(ParseTableBuilderError::StateCount(parse_table.states.len()))?;
-    }
-
-    Ok(Tables {
+    Ok((
        parse_table,
-        main_lex_table: lex_tables.main_lex_table,
-        keyword_lex_table: lex_tables.keyword_lex_table,
-        large_character_sets: lex_tables.large_character_sets,
-    })
-}
-
-fn get_following_tokens(
-    syntax_grammar: &SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar,
-    inlines: &InlinedProductionMap,
-    builder: &ParseItemSetBuilder,
-) -> Vec<TokenSet> {
-    let mut result = vec![TokenSet::new(); lexical_grammar.variables.len()];
-    let productions = syntax_grammar
-        .variables
-        .iter()
-        .flat_map(|v| &v.productions)
-        .chain(&inlines.productions);
-    let all_tokens = (0..result.len())
-        .map(Symbol::terminal)
-        .collect::<TokenSet>();
-    for production in productions {
-        for i in 1..production.steps.len() {
-            let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
-            let right_tokens = builder.first_set(&production.steps[i].symbol);
-            let right_reserved_tokens = builder.reserved_first_set(&production.steps[i].symbol);
-            for left_token in left_tokens.iter() {
-                if left_token.is_terminal() {
-                    result[left_token.index].insert_all_terminals(right_tokens);
-                    if let Some(reserved_tokens) = right_reserved_tokens {
-                        result[left_token.index].insert_all_terminals(reserved_tokens);
-                    }
-                }
-            }
-        }
-    }
-    for extra in &syntax_grammar.extra_symbols {
-        if extra.is_terminal() {
-            for entry in &mut result {
-                entry.insert(*extra);
-            }
-            result[extra.index] = all_tokens.clone();
-        }
-    }
-    result
+        main_lex_table,
+        keyword_lex_table,
+        syntax_grammar.word_token,
+    ))
 }

 fn populate_error_state(
@ -169,7 +97,7 @@ fn populate_error_state(

    // First identify the *conflict-free tokens*: tokens that do not overlap with
    // any other token in any way, besides matching exactly the same string.
-    let conflict_free_tokens = (0..n)
+    let conflict_free_tokens: TokenSet = (0..n)
        .filter_map(|i| {
            let conflicts_with_other_tokens = (0..n).any(|j| {
                j != i
@ -179,14 +107,14 @@ fn populate_error_state(
            if conflicts_with_other_tokens {
                None
            } else {
-                debug!(
+                info!(
                    "error recovery - token {} has no conflicts",
                    lexical_grammar.variables[i].name
                );
                Some(Symbol::terminal(i))
            }
        })
-        .collect::<TokenSet>();
+        .collect();

    let recover_entry = ParseTableEntry {
        reusable: false,
@ -205,14 +133,14 @@ fn populate_error_state(
                !coincident_token_index.contains(symbol, *t)
                    && token_conflict_map.does_conflict(symbol.index, t.index)
            }) {
-                debug!(
+                info!(
                    "error recovery - exclude token {} because of conflict with {}",
                    lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
                );
                continue;
            }
        }
-        debug!(
+        info!(
            "error recovery - include token {}",
            lexical_grammar.variables[i].name
        );
@ -263,7 +191,7 @@ fn populate_used_symbols(
            // ensure that a subtree's symbol can be successfully reassigned to the word token
            // without having to move the subtree to the heap.
            // See https://github.com/tree-sitter/tree-sitter/issues/258
-            if syntax_grammar.word_token.is_some_and(|t| t.index == i) {
+            if syntax_grammar.word_token.map_or(false, |t| t.index == i) {
                parse_table.symbols.insert(1, Symbol::terminal(i));
            } else {
                parse_table.symbols.push(Symbol::terminal(i));
@ -335,7 +263,7 @@ fn identify_keywords(

    // First find all of the candidate keyword tokens: tokens that start with
    // letters or underscore and can match the same string as a word token.
-    let keyword_candidates = lexical_grammar
+    let keyword_candidates: TokenSet = lexical_grammar
        .variables
        .iter()
        .enumerate()
@ -345,7 +273,7 @@ fn identify_keywords(
                && token_conflict_map.does_match_same_string(i, word_token.index)
                && !token_conflict_map.does_match_different_string(i, word_token.index)
            {
-                debug!(
+                info!(
                    "Keywords - add candidate {}",
                    lexical_grammar.variables[i].name
                );
@ -354,17 +282,17 @@ fn identify_keywords(
                None
            }
        })
-        .collect::<TokenSet>();
+        .collect();

    // Exclude keyword candidates that shadow another keyword candidate.
-    let keywords = keyword_candidates
+    let keywords: TokenSet = keyword_candidates
        .iter()
        .filter(|token| {
            for other_token in keyword_candidates.iter() {
                if other_token != *token
                    && token_conflict_map.does_match_same_string(other_token.index, token.index)
                {
-                    debug!(
+                    info!(
                        "Keywords - exclude {} because it matches the same string as {}",
                        lexical_grammar.variables[token.index].name,
                        lexical_grammar.variables[other_token.index].name
@ -374,7 +302,7 @@ fn identify_keywords(
            }
            true
        })
-        .collect::<TokenSet>();
+        .collect();

    // Exclude keyword candidates for which substituting the keyword capture
    // token would introduce new lexical conflicts with other tokens.
@ -406,7 +334,7 @@ fn identify_keywords(
                    word_token.index,
                    other_index,
                ) {
-                    debug!(
+                    info!(
                        "Keywords - exclude {} because of conflict with {}",
                        lexical_grammar.variables[token.index].name,
                        lexical_grammar.variables[other_index].name
@ -415,7 +343,7 @@ fn identify_keywords(
                }
            }

-            debug!(
+            info!(
                "Keywords - include {}",
                lexical_grammar.variables[token.index].name,
            );
@ -469,9 +397,9 @@ fn report_state_info<'a>(
    for (i, state) in parse_table.states.iter().enumerate() {
        all_state_indices.insert(i);
        let item_set = &parse_state_info[state.id];
-        for entry in &item_set.1.entries {
-            if !entry.item.is_augmented() {
-                symbols_with_state_indices[entry.item.variable_index as usize]
+        for (item, _) in &item_set.1.entries {
+            if !item.is_augmented() {
+                symbols_with_state_indices[item.variable_index as usize]
                    .1
                    .insert(i);
            }
@ -487,14 +415,14 @@ fn report_state_info<'a>(
        .max()
        .unwrap();
    for (symbol, states) in &symbols_with_state_indices {
-        info!(
+        eprintln!(
            "{:width$}\t{}",
            syntax_grammar.variables[symbol.index].name,
            states.len(),
            width = max_symbol_name_length
        );
    }
-    info!("");
+    eprintln!();

    let state_indices = if report_symbol_name == "*" {
        Some(&all_state_indices)
@ -517,27 +445,22 @@ fn report_state_info<'a>(
        for state_index in state_indices {
            let id = parse_table.states[state_index].id;
            let (preceding_symbols, item_set) = &parse_state_info[id];
-            info!("state index: {state_index}");
-            info!("state id: {id}");
-            info!(
-                "symbol sequence: {}",
-                preceding_symbols
-                    .iter()
-                    .map(|symbol| {
-                        if symbol.is_terminal() {
-                            lexical_grammar.variables[symbol.index].name.clone()
-                        } else if symbol.is_external() {
-                            syntax_grammar.external_tokens[symbol.index].name.clone()
-                        } else {
-                            syntax_grammar.variables[symbol.index].name.clone()
-                        }
-                    })
-                    .collect::<Vec<_>>()
-                    .join(" ")
-            );
-            info!(
+            eprintln!("state index: {state_index}");
+            eprintln!("state id: {id}");
+            eprint!("symbol sequence:");
+            for symbol in preceding_symbols {
+                let name = if symbol.is_terminal() {
+                    &lexical_grammar.variables[symbol.index].name
+                } else if symbol.is_external() {
+                    &syntax_grammar.external_tokens[symbol.index].name
+                } else {
+                    &syntax_grammar.variables[symbol.index].name
+                };
+                eprint!(" {name}");
+            }
+            eprintln!(
                "\nitems:\n{}",
-                item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar),
+                self::item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar,),
            );
        }
    }
--- a/crates/generate/src/build_tables/token_conflicts.rs
+++ b/crates/generate/src/build_tables/token_conflicts.rs
@ -1,11 +1,10 @@
-use std::{cmp::Ordering, collections::HashSet, fmt};
-
-use crate::{
-    build_tables::item::TokenSetDisplay,
-    grammars::{LexicalGrammar, SyntaxGrammar},
-    nfa::{CharacterSet, NfaCursor, NfaTransition},
-    rules::TokenSet,
-};
+use crate::generate::build_tables::item::TokenSetDisplay;
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
+use crate::generate::rules::TokenSet;
+use std::cmp::Ordering;
+use std::collections::HashSet;
+use std::fmt;

 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 struct TokenConflictStatus {
@ -28,7 +27,7 @@ pub struct TokenConflictMap<'a> {

 impl<'a> TokenConflictMap<'a> {
    /// Create a token conflict map based on a lexical grammar, which describes the structure
-    /// of each token, and a `following_token` map, which indicates which tokens may be appear
+    /// each token, and a `following_token` map, which indicates which tokens may be appear
    /// immediately after each other token.
    ///
    /// This analyzes the possible kinds of overlap between each pair of tokens and stores
@ -145,7 +144,7 @@ impl<'a> TokenConflictMap<'a> {
    }
 }

-impl fmt::Debug for TokenConflictMap<'_> {
+impl<'a> fmt::Debug for TokenConflictMap<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "TokenConflictMap {{")?;

@ -373,11 +372,9 @@ fn compute_conflict_status(
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::{
-        grammars::{Variable, VariableType},
-        prepare_grammar::{expand_tokens, ExtractedLexicalGrammar},
-        rules::{Precedence, Rule, Symbol},
-    };
+    use crate::generate::grammars::{Variable, VariableType};
+    use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
+    use crate::generate::rules::{Precedence, Rule, Symbol};

    #[test]
    fn test_starting_characters() {
--- a/cli/src/generate/char_tree.rs
+++ b/cli/src/generate/char_tree.rs
@ -0,0 +1,133 @@
+use std::ops::Range;
+
+/// A set of characters represented as a balanced binary tree of comparisons.
+/// This is used as an intermediate step in generating efficient code for
+/// matching a given character set.
+#[derive(PartialEq, Eq)]
+pub enum CharacterTree {
+    Yes,
+    Compare {
+        value: char,
+        operator: Comparator,
+        consequence: Option<Box<CharacterTree>>,
+        alternative: Option<Box<CharacterTree>>,
+    },
+}
+
+#[derive(PartialEq, Eq)]
+pub enum Comparator {
+    Less,
+    LessOrEqual,
+    Equal,
+    GreaterOrEqual,
+}
+
+impl CharacterTree {
+    pub fn from_ranges(ranges: &[Range<char>]) -> Option<Self> {
+        match ranges.len() {
+            0 => None,
+            1 => {
+                let range = &ranges[0];
+                if range.start == range.end {
+                    Some(Self::Compare {
+                        operator: Comparator::Equal,
+                        value: range.start,
+                        consequence: Some(Box::new(Self::Yes)),
+                        alternative: None,
+                    })
+                } else {
+                    Some(Self::Compare {
+                        operator: Comparator::GreaterOrEqual,
+                        value: range.start,
+                        consequence: Some(Box::new(Self::Compare {
+                            operator: Comparator::LessOrEqual,
+                            value: range.end,
+                            consequence: Some(Box::new(Self::Yes)),
+                            alternative: None,
+                        })),
+                        alternative: None,
+                    })
+                }
+            }
+            len => {
+                let mid = len / 2;
+                let mid_range = &ranges[mid];
+                Some(Self::Compare {
+                    operator: Comparator::Less,
+                    value: mid_range.start,
+                    consequence: Self::from_ranges(&ranges[0..mid]).map(Box::new),
+                    alternative: Some(Box::new(Self::Compare {
+                        operator: Comparator::LessOrEqual,
+                        value: mid_range.end,
+                        consequence: Some(Box::new(Self::Yes)),
+                        alternative: Self::from_ranges(&ranges[(mid + 1)..]).map(Box::new),
+                    })),
+                })
+            }
+        }
+    }
+
+    #[cfg(test)]
+    fn contains(&self, c: char) -> bool {
+        match self {
+            Self::Yes => true,
+            Self::Compare {
+                value,
+                operator,
+                alternative,
+                consequence,
+            } => {
+                let condition = match operator {
+                    Comparator::Less => c < *value,
+                    Comparator::LessOrEqual => c <= *value,
+                    Comparator::Equal => c == *value,
+                    Comparator::GreaterOrEqual => c >= *value,
+                };
+                if condition { consequence } else { alternative }
+                    .as_ref()
+                    .map_or(false, |a| a.contains(c))
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_character_tree_simple() {
+        let tree = CharacterTree::from_ranges(&['a'..'d', 'h'..'l', 'p'..'r', 'u'..'u', 'z'..'z'])
+            .unwrap();
+
+        assert!(tree.contains('a'));
+        assert!(tree.contains('b'));
+        assert!(tree.contains('c'));
+        assert!(tree.contains('d'));
+
+        assert!(!tree.contains('e'));
+        assert!(!tree.contains('f'));
+        assert!(!tree.contains('g'));
+
+        assert!(tree.contains('h'));
+        assert!(tree.contains('i'));
+        assert!(tree.contains('j'));
+        assert!(tree.contains('k'));
+        assert!(tree.contains('l'));
+
+        assert!(!tree.contains('m'));
+        assert!(!tree.contains('n'));
+        assert!(!tree.contains('o'));
+
+        assert!(tree.contains('p'));
+        assert!(tree.contains('q'));
+        assert!(tree.contains('r'));
+
+        assert!(!tree.contains('s'));
+        assert!(!tree.contains('s'));
+
+        assert!(tree.contains('u'));
+
+        assert!(!tree.contains('v'));
+    }
+}
--- a/crates/generate/src/dedup.rs
+++ b/crates/generate/src/dedup.rs
@ -3,7 +3,7 @@ pub fn split_state_id_groups<S>(
    state_ids_by_group_id: &mut Vec<Vec<usize>>,
    group_ids_by_state_id: &mut [usize],
    start_group_id: usize,
-    mut should_split: impl FnMut(&S, &S, &[usize]) -> bool,
+    mut f: impl FnMut(&S, &S, &[usize]) -> bool,
 ) -> bool {
    let mut result = false;

@ -33,7 +33,7 @@ pub fn split_state_id_groups<S>(
                }
                let right_state = &states[right_state_id];

-                if should_split(left_state, right_state, group_ids_by_state_id) {
+                if f(left_state, right_state, group_ids_by_state_id) {
                    split_state_ids.push(right_state_id);
                }

--- a/crates/generate/src/dsl.js
+++ b/crates/generate/src/dsl.js
@ -16,7 +16,6 @@ function alias(rule, value) {
      result.value = value.symbol.name;
      return result;
    case Object:
-    case GrammarSymbol:
      if (typeof value.type === 'string' && value.type === 'SYMBOL') {
        result.named = true;
        result.value = value.name;
@ -24,7 +23,7 @@ function alias(rule, value) {
      }
  }

-  throw new Error(`Invalid alias value ${value}`);
+  throw new Error('Invalid alias value ' + value);
 }

 function blank() {
@ -36,7 +35,7 @@ function blank() {
 function field(name, rule) {
  return {
    type: "FIELD",
-    name,
+    name: name,
    content: normalize(rule)
  }
 }
@ -49,14 +48,13 @@ function choice(...elements) {
 }

 function optional(value) {
-  checkArguments(arguments, arguments.length, optional, 'optional');
+  checkArguments(arguments.length, optional, 'optional');
  return choice(value, blank());
 }

 function prec(number, rule) {
  checkPrecedence(number);
  checkArguments(
-    arguments,
    arguments.length - 1,
    prec,
    'prec',
@ -70,7 +68,7 @@ function prec(number, rule) {
  };
 }

-prec.left = function (number, rule) {
+prec.left = function(number, rule) {
  if (rule == null) {
    rule = number;
    number = 0;
@ -78,7 +76,6 @@ prec.left = function (number, rule) {

  checkPrecedence(number);
  checkArguments(
-    arguments,
    arguments.length - 1,
    prec.left,
    'prec.left',
@ -92,7 +89,7 @@ prec.left = function (number, rule) {
  };
 }

-prec.right = function (number, rule) {
+prec.right = function(number, rule) {
  if (rule == null) {
    rule = number;
    number = 0;
@ -100,7 +97,6 @@ prec.right = function (number, rule) {

  checkPrecedence(number);
  checkArguments(
-    arguments,
    arguments.length - 1,
    prec.right,
    'prec.right',
@ -114,10 +110,9 @@ prec.right = function (number, rule) {
  };
 }

-prec.dynamic = function (number, rule) {
+prec.dynamic = function(number, rule) {
  checkPrecedence(number);
  checkArguments(
-    arguments,
    arguments.length - 1,
    prec.dynamic,
    'prec.dynamic',
@ -132,7 +127,7 @@ prec.dynamic = function (number, rule) {
 }

 function repeat(rule) {
-  checkArguments(arguments, arguments.length, repeat, 'repeat');
+  checkArguments(arguments.length, repeat, 'repeat');
  return {
    type: "REPEAT",
    content: normalize(rule)
@ -140,7 +135,7 @@ function repeat(rule) {
 }

 function repeat1(rule) {
-  checkArguments(arguments, arguments.length, repeat1, 'repeat1');
+  checkArguments(arguments.length, repeat1, 'repeat1');
  return {
    type: "REPEAT1",
    content: normalize(rule)
@ -154,38 +149,21 @@ function seq(...elements) {
  };
 }

-class GrammarSymbol {
-  constructor(name) {
-    this.type = "SYMBOL";
-    this.name = name;
-  }
-}
-
-function reserved(wordset, rule) {
-  if (typeof wordset !== 'string') {
-    throw new Error('Invalid reserved word set name: ' + wordset)
-  }
-  return {
-    type: "RESERVED",
-    content: normalize(rule),
-    context_name: wordset,
-  }
-}
-
 function sym(name) {
-  return new GrammarSymbol(name);
+  return {
+    type: "SYMBOL",
+    name: name
+  };
 }

 function token(value) {
-  checkArguments(arguments, arguments.length, token, 'token', '', 'literal');
  return {
    type: "TOKEN",
    content: normalize(value)
  };
 }

-token.immediate = function (value) {
-  checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal');
+token.immediate = function(value) {
  return {
    type: "IMMEDIATE_TOKEN",
    content: normalize(value)
@ -211,28 +189,23 @@ function normalize(value) {
        type: 'PATTERN',
        value: value.source
      };
-    case RustRegex:
-      return {
-        type: 'PATTERN',
-        value: value.value
-      };
    case ReferenceError:
      throw value
    default:
      if (typeof value.type === 'string') {
        return value;
      } else {
-        throw new TypeError(`Invalid rule: ${value}`);
+        throw new TypeError("Invalid rule: " + value.toString());
      }
  }
 }

 function RuleBuilder(ruleMap) {
  return new Proxy({}, {
-    get(_, propertyName) {
+    get(target, propertyName) {
      const symbol = sym(propertyName);

-      if (!ruleMap || Object.prototype.hasOwnProperty.call(ruleMap, propertyName)) {
+      if (!ruleMap || ruleMap.hasOwnProperty(propertyName)) {
        return symbol;
      } else {
        const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
@ -244,8 +217,6 @@ function RuleBuilder(ruleMap) {
 }

 function grammar(baseGrammar, options) {
-  let inherits = undefined;
-
  if (!options) {
    options = baseGrammar;
    baseGrammar = {
@ -257,11 +228,9 @@ function grammar(baseGrammar, options) {
      inline: [],
      supertypes: [],
      precedences: [],
-      reserved: {},
    };
  } else {
    baseGrammar = baseGrammar.grammar;
-    inherits = baseGrammar.name;
  }

  let externals = baseGrammar.externals;
@ -281,10 +250,10 @@ function grammar(baseGrammar, options) {
  }

  const ruleMap = {};
-  for (const key of Object.keys(options.rules)) {
+  for (const key in options.rules) {
    ruleMap[key] = true;
  }
-  for (const key of Object.keys(baseGrammar.rules)) {
+  for (const key in baseGrammar.rules) {
    ruleMap[key] = true;
  }
  for (const external of externals) {
@ -304,52 +273,18 @@ function grammar(baseGrammar, options) {
    throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
  }

-  if (inherits && typeof inherits !== "string") {
-    throw new Error("Base grammar's 'name' property must be a string.");
-  }
-
-  if (inherits && !/^[a-zA-Z_]\w*$/.test(name)) {
-    throw new Error("Base grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
-  }
-
-  const rules = Object.assign({}, baseGrammar.rules);
+  let rules = Object.assign({}, baseGrammar.rules);
  if (options.rules) {
    if (typeof options.rules !== "object") {
      throw new Error("Grammar's 'rules' property must be an object.");
    }

-    for (const ruleName of Object.keys(options.rules)) {
+    for (const ruleName in options.rules) {
      const ruleFn = options.rules[ruleName];
      if (typeof ruleFn !== "function") {
-        throw new Error(`Grammar rules must all be functions. '${ruleName}' rule is not.`);
+        throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
      }
-      const rule = ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]);
-      if (rule === undefined) {
-        throw new Error(`Rule '${ruleName}' returned undefined.`);
-      }
-      rules[ruleName] = normalize(rule);
-    }
-  }
-
-  let reserved = baseGrammar.reserved;
-  if (options.reserved) {
-    if (typeof options.reserved !== "object") {
-      throw new Error("Grammar's 'reserved' property must be an object.");
-    }
-
-    for (const reservedWordSetName of Object.keys(options.reserved)) {
-      const reservedWordSetFn = options.reserved[reservedWordSetName]
-      if (typeof reservedWordSetFn !== "function") {
-        throw new Error(`Grammar reserved word sets must all be functions. '${reservedWordSetName}' is not.`);
-      }
-
-      const reservedTokens = reservedWordSetFn.call(ruleBuilder, ruleBuilder, baseGrammar.reserved[reservedWordSetName]);
-
-      if (!Array.isArray(reservedTokens)) {
-        throw new Error(`Grammar's reserved word set functions must all return arrays of rules. '${reservedWordSetName}' does not.`);
-      }
-
-      reserved[reservedWordSetName] = reservedTokens.map(normalize);
+      rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
    }
  }

@ -442,12 +377,7 @@ function grammar(baseGrammar, options) {
      throw new Error("Grammar's supertypes must be an array of rules.");
    }

-    supertypes = supertypeRules.map(symbol => {
-      if (symbol.name === 'ReferenceError') {
-        throw new Error(`Supertype rule \`${symbol.symbol.name}\` is not defined.`);
-      }
-      return symbol.name;
-    });
+    supertypes = supertypeRules.map(symbol => symbol.name);
  }

  let precedences = baseGrammar.precedences;
@ -467,43 +397,18 @@ function grammar(baseGrammar, options) {
    });
  }

-  if (Object.keys(rules).length === 0) {
+  if (Object.keys(rules).length == 0) {
    throw new Error("Grammar must have at least one rule.");
  }

-  return {
-    grammar: {
-      name,
-      inherits,
-      word,
-      rules,
-      extras,
-      conflicts,
-      precedences,
-      externals,
-      inline,
-      supertypes,
-      reserved,
-    },
-  };
+  return { grammar: { name, word, rules, extras, conflicts, precedences, externals, inline, supertypes } };
 }

-class RustRegex {
-  constructor(value) {
-    this.value = value;
-  }
-}
-
-function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') {
-  // Allow for .map() usage where additional arguments are index and the entire array.
-  const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]);
-  if (isMapCall) {
-    ruleCount = typeof args[2] === 'number' ? 1 : args[2].length;
-  }
-  if (ruleCount > 1 && !isMapCall) {
+function checkArguments(ruleCount, caller, callerName, suffix = '') {
+  if (ruleCount > 1) {
    const error = new Error([
-      `The \`${callerName}\` function only takes one ${argType} argument${suffix}.`,
-      `You passed in multiple ${argType}s. Did you mean to call \`seq\`?\n`
+      `The \`${callerName}\` function only takes one rule argument${suffix}.`,
+      'You passed multiple rules. Did you mean to call `seq`?\n'
    ].join('\n'));
    Error.captureStackTrace(error, caller);
    throw error
@ -516,48 +421,18 @@ function checkPrecedence(value) {
  }
 }

-function getEnv(name) {
-  if (globalThis.native) return globalThis.__ts_grammar_path;
-  if (globalThis.process) return process.env[name]; // Node/Bun
-  if (globalThis.Deno) return Deno.env.get(name); // Deno
-  throw Error("Unsupported JS runtime");
-}
+global.alias = alias;
+global.blank = blank;
+global.choice = choice;
+global.optional = optional;
+global.prec = prec;
+global.repeat = repeat;
+global.repeat1 = repeat1;
+global.seq = seq;
+global.sym = sym;
+global.token = token;
+global.grammar = grammar;
+global.field = field;

-globalThis.alias = alias;
-globalThis.blank = blank;
-globalThis.choice = choice;
-globalThis.optional = optional;
-globalThis.prec = prec;
-globalThis.repeat = repeat;
-globalThis.repeat1 = repeat1;
-globalThis.reserved = reserved;
-globalThis.seq = seq;
-globalThis.sym = sym;
-globalThis.token = token;
-globalThis.grammar = grammar;
-globalThis.field = field;
-globalThis.RustRegex = RustRegex;
-
-const grammarPath = getEnv("TREE_SITTER_GRAMMAR_PATH");
-let result = await import(grammarPath);
-let grammarObj = result.default?.grammar ?? result.grammar;
-
-if (globalThis.native && !grammarObj) {
-  grammarObj = module.exports.grammar;
-}
-
-const object = {
-  "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
-  ...grammarObj,
-};
-const output = JSON.stringify(object);
-
-if (globalThis.native) {
-  globalThis.output = output;
-} else if (globalThis.process) { // Node/Bun
-  process.stdout.write(output);
-} else if (globalThis.Deno) { // Deno
-  Deno.stdout.writeSync(new TextEncoder().encode(output));
-} else {
-  throw Error("Unsupported JS runtime");
-}
+const result = require(process.env.TREE_SITTER_GRAMMAR_PATH);
+process.stdout.write(JSON.stringify(result.grammar, null, null));
--- a/docs/src/assets/schemas/grammar.schema.json
+++ b/docs/src/assets/schemas/grammar.schema.json
@ -1,6 +1,6 @@
 {
  "$schema": "http://json-schema.org/draft-07/schema#",
-  "title": "Tree-sitter grammar specification",
+  "title": "tree-sitter grammar specification",
  "type": "object",

  "required": ["name", "rules"],
@ -8,18 +8,8 @@
  "additionalProperties": false,

  "properties": {
-    "$schema": {
-      "type": "string"
-    },
-
    "name": {
-      "description": "The name of the grammar",
-      "type": "string",
-      "pattern": "^[a-zA-Z_]\\w*"
-    },
-
-    "inherits": {
-      "description": "The name of the parent grammar",
+      "description": "the name of the grammar",
      "type": "string",
      "pattern": "^[a-zA-Z_]\\w*"
    },
@ -36,7 +26,6 @@

    "extras": {
      "type": "array",
-      "uniqueItems": true,
      "items": {
        "$ref": "#/definitions/rule"
      }
@ -44,36 +33,16 @@

    "precedences": {
      "type": "array",
-      "uniqueItems": true,
      "items": {
        "type": "array",
-        "uniqueItems": true,
        "items": {
-          "oneOf": [
-            { "type": "string" },
-            { "$ref": "#/definitions/symbol-rule" }
-          ]
+          "$ref": "#/definitions/rule"
        }
      }
    },

-    "reserved": {
-      "type": "object",
-      "patternProperties": {
-        "^[a-zA-Z_]\\w*$": {
-          "type": "array",
-          "uniqueItems": true,
-          "items": {
-            "$ref": "#/definitions/rule"
-          }
-        }
-      },
-      "additionalProperties": false
-    },
-
    "externals": {
      "type": "array",
-      "uniqueItems": true,
      "items": {
        "$ref": "#/definitions/rule"
      }
@ -81,7 +50,6 @@

    "inline": {
      "type": "array",
-      "uniqueItems": true,
      "items": {
        "type": "string",
        "pattern": "^[a-zA-Z_]\\w*$"
@ -90,10 +58,8 @@

    "conflicts": {
      "type": "array",
-      "uniqueItems": true,
      "items": {
        "type": "array",
-        "uniqueItems": true,
        "items": {
          "type": "string",
          "pattern": "^[a-zA-Z_]\\w*$"
@ -107,11 +73,10 @@
    },

    "supertypes": {
-      "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types.",
+      "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
      "type": "array",
-      "uniqueItems": true,
      "items": {
-        "description": "The name of a rule in `rules` or `extras`",
+        "description": "the name of a rule in `rules` or `extras`",
        "type": "string"
      }
    }
@ -123,7 +88,7 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "BLANK"
+          "pattern": "^BLANK$"
        }
      },
      "required": ["type"]
@ -134,7 +99,7 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "STRING"
+          "pattern": "^STRING$"
        },
        "value": {
          "type": "string"
@ -148,10 +113,9 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "PATTERN"
+          "pattern": "^PATTERN$"
        },
-        "value": { "type": "string" },
-        "flags": { "type": "string" }
+        "value": { "type": "string" }
      },
      "required": ["type", "value"]
    },
@ -161,7 +125,7 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "SYMBOL"
+          "pattern": "^SYMBOL$"
        },
        "name": { "type": "string" }
      },
@ -173,7 +137,7 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "SEQ"
+          "pattern": "^SEQ$"
        },
        "members": {
          "type": "array",
@ -190,7 +154,7 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "CHOICE"
+          "pattern": "^CHOICE$"
        },
        "members": {
          "type": "array",
@ -207,10 +171,14 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "ALIAS"
+          "pattern": "^ALIAS$"
+        },
+        "value": {
+          "type": "string"
+        },
+        "named": {
+          "type": "boolean"
        },
-        "value": { "type": "string" },
-        "named": { "type": "boolean" },
        "content": {
          "$ref": "#/definitions/rule"
        }
@ -223,7 +191,7 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "REPEAT"
+          "pattern": "^REPEAT$"
        },
        "content": {
          "$ref": "#/definitions/rule"
@ -237,7 +205,7 @@
      "properties": {
        "type": {
          "type": "string",
-          "const": "REPEAT1"
+          "pattern": "^REPEAT1$"
        },
        "content": {
          "$ref": "#/definitions/rule"
@ -246,30 +214,12 @@
      "required": ["type", "content"]
    },

-    "reserved-rule": {
-      "type": "object",
-      "properties": {
-        "type": {
-          "type": "string",
-          "const": "RESERVED"
-        },
-        "context_name": { "type": "string" },
-        "content": {
-          "$ref": "#/definitions/rule"
-        }
-      },
-      "required": ["type", "context_name", "content"]
-    },
-
    "token-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
-          "enum": [
-            "TOKEN",
-            "IMMEDIATE_TOKEN"
-          ]
+          "pattern": "^(TOKEN|IMMEDIATE_TOKEN)$"
        },
        "content": {
          "$ref": "#/definitions/rule"
@ -283,7 +233,7 @@
        "name": { "type": "string" },
        "type": {
          "type": "string",
-          "const": "FIELD"
+          "pattern": "^FIELD$"
        },
        "content": {
          "$ref": "#/definitions/rule"
@ -297,12 +247,7 @@
      "properties": {
        "type": {
          "type": "string",
-          "enum": [
-            "PREC",
-            "PREC_LEFT",
-            "PREC_RIGHT",
-            "PREC_DYNAMIC"
-          ]
+          "pattern": "^(PREC|PREC_LEFT|PREC_RIGHT|PREC_DYNAMIC)$"
        },
        "value": {
          "oneof": [
@ -328,7 +273,6 @@
        { "$ref": "#/definitions/choice-rule" },
        { "$ref": "#/definitions/repeat1-rule" },
        { "$ref": "#/definitions/repeat-rule" },
-        { "$ref": "#/definitions/reserved-rule" },
        { "$ref": "#/definitions/token-rule" },
        { "$ref": "#/definitions/field-rule" },
        { "$ref": "#/definitions/prec-rule" }
--- a/crates/generate/src/grammars.rs
+++ b/crates/generate/src/grammars.rs
@ -1,9 +1,7 @@
-use std::{collections::HashMap, fmt};
-
-use super::{
-    nfa::Nfa,
-    rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
-};
+use super::nfa::Nfa;
+use super::rules::{Alias, Associativity, Precedence, Rule, Symbol};
+use std::collections::HashMap;
+use std::fmt;

 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum VariableType {
@ -39,13 +37,6 @@ pub struct InputGrammar {
    pub variables_to_inline: Vec<String>,
    pub supertype_symbols: Vec<String>,
    pub word_token: Option<String>,
-    pub reserved_words: Vec<ReservedWordContext<Rule>>,
-}
-
-#[derive(Debug, Default, PartialEq, Eq)]
-pub struct ReservedWordContext<T> {
-    pub name: String,
-    pub reserved_words: Vec<T>,
 }

 // Extracted lexical grammar
@ -73,20 +64,8 @@ pub struct ProductionStep {
    pub associativity: Option<Associativity>,
    pub alias: Option<Alias>,
    pub field_name: Option<String>,
-    pub reserved_word_set_id: ReservedWordSetId,
 }

-#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub struct ReservedWordSetId(pub usize);
-
-impl fmt::Display for ReservedWordSetId {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        self.0.fmt(f)
-    }
-}
-
-pub const NO_RESERVED_WORDS: ReservedWordSetId = ReservedWordSetId(usize::MAX);
-
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct Production {
    pub steps: Vec<ProductionStep>,
@ -123,44 +102,50 @@ pub struct SyntaxGrammar {
    pub variables_to_inline: Vec<Symbol>,
    pub word_token: Option<Symbol>,
    pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
-    pub reserved_word_sets: Vec<TokenSet>,
 }

 #[cfg(test)]
 impl ProductionStep {
-    #[must_use]
-    pub fn new(symbol: Symbol) -> Self {
+    pub const fn new(symbol: Symbol) -> Self {
        Self {
            symbol,
            precedence: Precedence::None,
            associativity: None,
            alias: None,
            field_name: None,
-            reserved_word_set_id: ReservedWordSetId::default(),
        }
    }

-    pub fn with_prec(
-        mut self,
-        precedence: Precedence,
-        associativity: Option<Associativity>,
-    ) -> Self {
-        self.precedence = precedence;
-        self.associativity = associativity;
-        self
+    pub fn with_prec(self, precedence: Precedence, associativity: Option<Associativity>) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence,
+            associativity,
+            alias: self.alias,
+            field_name: self.field_name,
+        }
    }

-    pub fn with_alias(mut self, value: &str, is_named: bool) -> Self {
-        self.alias = Some(Alias {
-            value: value.to_string(),
-            is_named,
-        });
-        self
+    pub fn with_alias(self, value: &str, is_named: bool) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence: self.precedence,
+            associativity: self.associativity,
+            alias: Some(Alias {
+                value: value.to_string(),
+                is_named,
+            }),
+            field_name: self.field_name,
+        }
    }
-
-    pub fn with_field_name(mut self, name: &str) -> Self {
-        self.field_name = Some(name.to_string());
-        self
+    pub fn with_field_name(self, name: &str) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence: self.precedence,
+            associativity: self.associativity,
+            alias: self.alias,
+            field_name: Some(name.to_string()),
+        }
    }
 }

@ -253,7 +238,7 @@ impl InlinedProductionMap {
        step_index: u32,
    ) -> Option<impl Iterator<Item = &'a Production> + 'a> {
        self.production_map
-            .get(&(std::ptr::from_ref::<Production>(production), step_index))
+            .get(&(production as *const Production, step_index))
            .map(|production_indices| {
                production_indices
                    .iter()
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@ -0,0 +1,291 @@
+mod binding_files;
+mod build_tables;
+mod char_tree;
+mod dedup;
+mod grammars;
+mod nfa;
+mod node_types;
+pub mod parse_grammar;
+mod prepare_grammar;
+mod render;
+mod rules;
+mod tables;
+
+use std::io::Write;
+use std::path::Path;
+use std::process::{Command, Stdio};
+use std::{env, fs};
+
+use anyhow::{anyhow, Context, Result};
+use lazy_static::lazy_static;
+use regex::{Regex, RegexBuilder};
+use semver::Version;
+use serde::Deserialize;
+
+use self::build_tables::build_tables;
+use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use self::parse_grammar::parse_grammar;
+use self::prepare_grammar::prepare_grammar;
+use self::render::render_c_code;
+use self::rules::AliasMap;
+
+lazy_static! {
+    static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
+        .multi_line(true)
+        .build()
+        .unwrap();
+}
+
+struct GeneratedParser {
+    c_code: String,
+    node_types_json: String,
+}
+
+pub fn generate_parser_in_directory(
+    repo_path: &Path,
+    grammar_path: Option<&str>,
+    abi_version: usize,
+    generate_bindings: bool,
+    report_symbol_name: Option<&str>,
+    js_runtime: Option<&str>,
+) -> Result<()> {
+    let src_path = repo_path.join("src");
+    let header_path = src_path.join("tree_sitter");
+
+    // Read the grammar.json.
+    let grammar_json = if let Some(path) = grammar_path {
+        load_grammar_file(path.as_ref(), js_runtime)?
+    } else {
+        let grammar_js_path =
+            grammar_path.map_or(repo_path.join("grammar.js"), std::convert::Into::into);
+        load_grammar_file(&grammar_js_path, js_runtime)?
+    };
+
+    // Ensure that the output directories exist.
+    fs::create_dir_all(&src_path)?;
+    fs::create_dir_all(&header_path)?;
+
+    if grammar_path.is_none() {
+        fs::write(src_path.join("grammar.json"), &grammar_json)
+            .with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?;
+    }
+
+    // Parse and preprocess the grammar.
+    let input_grammar = parse_grammar(&grammar_json)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+        prepare_grammar(&input_grammar)?;
+    let language_name = input_grammar.name;
+
+    let language_semver = read_package_json_version()?;
+    let rust_binding_version = read_rust_binding_version()?;
+    if language_semver != rust_binding_version {
+        anyhow::bail!(
+            "Error:
+            The version of your language grammar in `package.json` is `{language_semver}`, but the version of your language grammar in `Cargo.toml` is `{rust_binding_version}`.
+            These versions must match. Please adjust one of these files to match the other, and then try running `tree-sitter generate` again.
+
+            Consider delegating this process to the `release` subcommand, which will handle git tags, GitHub releases, and publishing to crates.io, npmjs, and PyPI for you.
+            Read more here: https://tree-sitter.github.io/tree-sitter/creating-parsers#releasing-a-new-grammar-version",
+        );
+    }
+
+    // Generate the parser and related files.
+    let GeneratedParser {
+        c_code,
+        node_types_json,
+    } = generate_parser_for_grammar_with_opts(
+        &language_name,
+        syntax_grammar,
+        lexical_grammar,
+        &inlines,
+        simple_aliases,
+        abi_version,
+        report_symbol_name,
+        (
+            language_semver.major as u8,
+            language_semver.minor as u8,
+            language_semver.patch as u8,
+        ),
+    )?;
+
+    write_file(&src_path.join("parser.c"), c_code)?;
+    write_file(&src_path.join("node-types.json"), node_types_json)?;
+    write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
+
+    if generate_bindings {
+        binding_files::generate_binding_files(repo_path, &language_name)?;
+    }
+
+    Ok(())
+}
+
+pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
+    let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
+    let input_grammar = parse_grammar(&grammar_json)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+        prepare_grammar(&input_grammar)?;
+    let parser = generate_parser_for_grammar_with_opts(
+        &input_grammar.name,
+        syntax_grammar,
+        lexical_grammar,
+        &inlines,
+        simple_aliases,
+        tree_sitter::LANGUAGE_VERSION,
+        None,
+        (0, 0, 0),
+    )?;
+    Ok((input_grammar.name, parser.c_code))
+}
+
+fn generate_parser_for_grammar_with_opts(
+    name: &str,
+    syntax_grammar: SyntaxGrammar,
+    lexical_grammar: LexicalGrammar,
+    inlines: &InlinedProductionMap,
+    simple_aliases: AliasMap,
+    abi_version: usize,
+    report_symbol_name: Option<&str>,
+    semantic_version: (u8, u8, u8),
+) -> Result<GeneratedParser> {
+    let variable_info =
+        node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
+    let node_types_json = node_types::generate_node_types_json(
+        &syntax_grammar,
+        &lexical_grammar,
+        &simple_aliases,
+        &variable_info,
+    );
+    let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
+        &syntax_grammar,
+        &lexical_grammar,
+        &simple_aliases,
+        &variable_info,
+        inlines,
+        report_symbol_name,
+    )?;
+    let c_code = render_c_code(
+        name,
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        keyword_capture_token,
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases,
+        abi_version,
+        semantic_version,
+    );
+    Ok(GeneratedParser {
+        c_code,
+        node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
+    })
+}
+
+fn read_package_json_version() -> Result<Version> {
+    #[derive(Deserialize)]
+    struct PackageJSON {
+        version: String,
+    }
+
+    let path = "package.json";
+    let text = fs::read_to_string(path).with_context(|| format!("Failed to read {path:?}"))?;
+
+    let package_json: PackageJSON =
+        serde_json::from_str(&text).with_context(|| format!("Failed to parse {path:?} as JSON"))?;
+
+    Ok(Version::parse(&package_json.version)?)
+}
+
+fn read_rust_binding_version() -> Result<Version> {
+    let path = "Cargo.toml";
+    let text = fs::read_to_string(path)?;
+    let cargo_toml = toml::from_str::<toml::Value>(text.as_ref())?;
+    Ok(Version::parse(
+        cargo_toml["package"]["version"].as_str().unwrap(),
+    )?)
+}
+
+pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
+    if grammar_path.is_dir() {
+        return Err(anyhow!(
+            "Path to a grammar file with `.js` or `.json` extension is required"
+        ));
+    }
+    match grammar_path.extension().and_then(|e| e.to_str()) {
+        Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)
+            .with_context(|| "Failed to load grammar.js")?),
+        Some("json") => {
+            Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
+        }
+        _ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)),
+    }
+}
+
+fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
+    let grammar_path = fs::canonicalize(grammar_path)?;
+
+    let js_runtime = js_runtime.unwrap_or("node");
+
+    let mut node_process = Command::new(js_runtime)
+        .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()
+        .with_context(|| format!("Failed to run `{js_runtime}`"))?;
+
+    let mut node_stdin = node_process
+        .stdin
+        .take()
+        .with_context(|| "Failed to open stdin for node")?;
+    let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
+        .with_context(|| "Could not parse this package's version as semver.")?;
+    write!(
+        node_stdin,
+        "global.TREE_SITTER_CLI_VERSION_MAJOR = {};
+        global.TREE_SITTER_CLI_VERSION_MINOR = {};
+        global.TREE_SITTER_CLI_VERSION_PATCH = {};",
+        cli_version.major, cli_version.minor, cli_version.patch,
+    )
+    .with_context(|| "Failed to write tree-sitter version to node's stdin")?;
+    let javascript_code = include_bytes!("./dsl.js");
+    node_stdin
+        .write(javascript_code)
+        .with_context(|| "Failed to write grammar dsl to node's stdin")?;
+    drop(node_stdin);
+    let output = node_process
+        .wait_with_output()
+        .with_context(|| "Failed to read output from node")?;
+    match output.status.code() {
+        None => panic!("Node process was killed"),
+        Some(0) => {
+            let stdout =
+                String::from_utf8(output.stdout).with_context(|| "Got invalid UTF8 from node")?;
+
+            let mut grammar_json = &stdout[..];
+
+            if let Some(pos) = stdout.rfind('\n') {
+                // If there's a newline, split the last line from the rest of the output
+                let node_output = &stdout[..pos];
+                grammar_json = &stdout[pos + 1..];
+
+                let mut stdout = std::io::stdout().lock();
+                stdout.write_all(node_output.as_bytes())?;
+                stdout.write_all(b"\n")?;
+                stdout.flush()?;
+            }
+
+            Ok(serde_json::to_string_pretty(
+                &serde_json::from_str::<serde_json::Value>(grammar_json)
+                    .with_context(|| "Failed to parse grammar JSON")?,
+            )
+            .with_context(|| "Failed to serialize grammar JSON")?
+                + "\n")
+        }
+        Some(code) => Err(anyhow!("Node process exited with status {code}")),
+    }
+}
+
+fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> {
+    fs::write(path, body)
+        .with_context(|| format!("Failed to write {:?}", path.file_name().unwrap()))
+}
--- a/crates/generate/src/nfa.rs
+++ b/crates/generate/src/nfa.rs
@ -1,14 +1,13 @@
-use std::{
-    char,
-    cmp::{max, Ordering},
-    fmt,
-    iter::ExactSizeIterator,
-    mem::{self, swap},
-    ops::{Range, RangeInclusive},
-};
+use std::char;
+use std::cmp::max;
+use std::cmp::Ordering;
+use std::collections::HashSet;
+use std::fmt;
+use std::mem::swap;
+use std::ops::Range;

 /// A set of characters represented as a vector of ranges.
-#[derive(Clone, Default, PartialEq, Eq, Hash)]
+#[derive(Clone, PartialEq, Eq, Hash)]
 pub struct CharacterSet {
    ranges: Vec<Range<u32>>,
 }
@ -58,8 +57,7 @@ impl CharacterSet {

    /// Create a character set with a given *inclusive* range of characters.
    #[allow(clippy::single_range_in_vec_init)]
-    #[cfg(test)]
-    fn from_range(mut first: char, mut last: char) -> Self {
+    pub fn from_range(mut first: char, mut last: char) -> Self {
        if first > last {
            swap(&mut first, &mut last);
        }
@ -116,11 +114,6 @@ impl CharacterSet {
        self
    }

-    pub fn assign(&mut self, other: &Self) {
-        self.ranges.clear();
-        self.ranges.extend_from_slice(&other.ranges);
-    }
-
    fn add_int_range(&mut self, mut i: usize, start: u32, end: u32) -> usize {
        while i < self.ranges.len() {
            let range = &mut self.ranges[i];
@ -287,30 +280,17 @@ impl CharacterSet {

    /// Produces a `CharacterSet` containing every character that is in _exactly one_ of `self` or
    /// `other`, but is not present in both sets.
-    #[cfg(test)]
-    fn symmetric_difference(mut self, mut other: Self) -> Self {
+    pub fn symmetric_difference(mut self, mut other: Self) -> Self {
        self.remove_intersection(&mut other);
        self.add(&other)
    }

-    pub fn char_codes(&self) -> impl Iterator<Item = u32> + '_ {
-        self.ranges.iter().flat_map(Clone::clone)
+    pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
+        self.ranges.iter().flat_map(std::clone::Clone::clone)
    }

    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
-        self.char_codes().filter_map(char::from_u32)
-    }
-
-    pub fn range_count(&self) -> usize {
-        self.ranges.len()
-    }
-
-    pub fn ranges(&self) -> impl Iterator<Item = RangeInclusive<char>> + '_ {
-        self.ranges.iter().filter_map(|range| {
-            let start = range.clone().find_map(char::from_u32)?;
-            let end = (range.start..range.end).rev().find_map(char::from_u32)?;
-            Some(start..=end)
-        })
+        self.iter().filter_map(char::from_u32)
    }

    pub fn is_empty(&self) -> bool {
@ -319,57 +299,41 @@ impl CharacterSet {

    /// Get a reduced list of character ranges, assuming that a given
    /// set of characters can be safely ignored.
-    pub fn simplify_ignoring(&self, ruled_out_characters: &Self) -> Self {
-        let mut prev_range: Option<Range<u32>> = None;
-        Self {
-            ranges: self
-                .ranges
-                .iter()
-                .map(|range| Some(range.clone()))
-                .chain([None])
-                .filter_map(move |range| {
-                    if let Some(range) = &range {
-                        if ruled_out_characters.contains_codepoint_range(range.clone()) {
-                            return None;
-                        }
-
-                        if let Some(prev_range) = &mut prev_range {
-                            if ruled_out_characters
-                                .contains_codepoint_range(prev_range.end..range.start)
-                            {
-                                prev_range.end = range.end;
-                                return None;
-                            }
+    pub fn simplify_ignoring<'a>(
+        &'a self,
+        ruled_out_characters: &'a HashSet<u32>,
+    ) -> Vec<Range<char>> {
+        let mut prev_range: Option<Range<char>> = None;
+        self.chars()
+            .map(|c| (c, false))
+            .chain(Some(('\0', true)))
+            .filter_map(move |(c, done)| {
+                if done {
+                    return prev_range.clone();
+                }
+                if ruled_out_characters.contains(&(c as u32)) {
+                    return None;
+                }
+                if let Some(range) = prev_range.clone() {
+                    let mut prev_range_successor = range.end as u32 + 1;
+                    while prev_range_successor < c as u32 {
+                        if !ruled_out_characters.contains(&prev_range_successor) {
+                            prev_range = Some(c..c);
+                            return Some(range);
                        }
+                        prev_range_successor += 1;
                    }
-
-                    let result = prev_range.clone();
-                    prev_range = range;
-                    result
-                })
-                .collect(),
-        }
-    }
-
-    pub fn contains_codepoint_range(&self, seek_range: Range<u32>) -> bool {
-        let ix = match self.ranges.binary_search_by(|probe| {
-            if probe.end <= seek_range.start {
-                Ordering::Less
-            } else if probe.start > seek_range.start {
-                Ordering::Greater
-            } else {
-                Ordering::Equal
-            }
-        }) {
-            Ok(ix) | Err(ix) => ix,
-        };
-        self.ranges
-            .get(ix)
-            .is_some_and(|range| range.start <= seek_range.start && range.end >= seek_range.end)
+                    prev_range = Some(range.start..c);
+                } else {
+                    prev_range = Some(c..c);
+                }
+                None
+            })
+            .collect()
    }

    pub fn contains(&self, c: char) -> bool {
-        self.contains_codepoint_range(c as u32..c as u32 + 1)
+        self.ranges.iter().any(|r| r.contains(&(c as u32)))
    }
 }

@ -378,9 +342,15 @@ impl Ord for CharacterSet {
        let count_cmp = self
            .ranges
            .iter()
-            .map(ExactSizeIterator::len)
+            .map(std::iter::ExactSizeIterator::len)
            .sum::<usize>()
-            .cmp(&other.ranges.iter().map(ExactSizeIterator::len).sum());
+            .cmp(
+                &other
+                    .ranges
+                    .iter()
+                    .map(std::iter::ExactSizeIterator::len)
+                    .sum(),
+            );
        if count_cmp != Ordering::Equal {
            return count_cmp;
        }
@ -416,11 +386,11 @@ impl fmt::Debug for CharacterSet {
            write!(f, "^ ")?;
            set = set.negate();
        }
-        for (i, range) in set.ranges().enumerate() {
+        for (i, c) in set.chars().enumerate() {
            if i > 0 {
                write!(f, ", ")?;
            }
-            write!(f, "{range:?}")?;
+            write!(f, "{c:?}")?;
        }
        write!(f, "]")?;
        Ok(())
@ -428,13 +398,11 @@ impl fmt::Debug for CharacterSet {
 }

 impl Nfa {
-    #[must_use]
    pub const fn new() -> Self {
        Self { states: Vec::new() }
    }

    pub fn last_state_id(&self) -> u32 {
-        assert!(!self.states.is_empty());
        self.states.len() as u32 - 1
    }
 }
@ -496,7 +464,7 @@ impl<'a> NfaCursor<'a> {
    fn group_transitions<'b>(
        iter: impl Iterator<Item = (&'b CharacterSet, bool, i32, u32)>,
    ) -> Vec<NfaTransition> {
-        let mut result = Vec::<NfaTransition>::new();
+        let mut result: Vec<NfaTransition> = Vec::new();
        for (chars, is_sep, prec, state) in iter {
            let mut chars = chars.clone();
            let mut i = 0;
@ -531,25 +499,25 @@ impl<'a> NfaCursor<'a> {
                });
            }
        }
+        result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters));

        let mut i = 0;
-        while i < result.len() {
+        'i_loop: while i < result.len() {
            for j in 0..i {
                if result[j].states == result[i].states
                    && result[j].is_separator == result[i].is_separator
                    && result[j].precedence == result[i].precedence
                {
-                    let characters = mem::take(&mut result[j].characters);
+                    let mut characters = CharacterSet::empty();
+                    swap(&mut characters, &mut result[j].characters);
                    result[j].characters = characters.add(&result[i].characters);
                    result.remove(i);
-                    i -= 1;
-                    break;
+                    continue 'i_loop;
                }
            }
            i += 1;
        }

-        result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters));
        result
    }

@ -831,18 +799,18 @@ mod tests {
                    (CharacterSet::from_char('e'), false, 0, 2),
                ],
                vec![
-                    NfaTransition {
-                        characters: CharacterSet::empty().add_char('b').add_char('e'),
-                        precedence: 0,
-                        states: vec![2],
-                        is_separator: false,
-                    },
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('a').add_range('c', 'd'),
                        precedence: 0,
                        states: vec![1],
                        is_separator: false,
                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('b').add_char('e'),
+                        precedence: 0,
+                        states: vec![2],
+                        is_separator: false,
+                    },
                ],
            ),
        ];
@ -950,19 +918,20 @@ mod tests {
            assert_eq!(
                left.remove_intersection(&mut right),
                row.intersection,
-                "row {i}a: {:?} && {:?}",
+                "row {}a: {:?} && {:?}",
+                i,
                row.left,
                row.right
            );
            assert_eq!(
                left, row.left_only,
-                "row {i}a: {:?} - {:?}",
-                row.left, row.right
+                "row {}a: {:?} - {:?}",
+                i, row.left, row.right
            );
            assert_eq!(
                right, row.right_only,
-                "row {i}a: {:?} - {:?}",
-                row.right, row.left
+                "row {}a: {:?} - {:?}",
+                i, row.right, row.left
            );

            let mut left = row.left.clone();
@ -970,25 +939,27 @@ mod tests {
            assert_eq!(
                right.remove_intersection(&mut left),
                row.intersection,
-                "row {i}b: {:?} && {:?}",
+                "row {}b: {:?} && {:?}",
+                i,
                row.left,
                row.right
            );
            assert_eq!(
                left, row.left_only,
-                "row {i}b: {:?} - {:?}",
-                row.left, row.right
+                "row {}b: {:?} - {:?}",
+                i, row.left, row.right
            );
            assert_eq!(
                right, row.right_only,
-                "row {i}b: {:?} - {:?}",
-                row.right, row.left
+                "row {}b: {:?} - {:?}",
+                i, row.right, row.left
            );

            assert_eq!(
                row.left.clone().difference(row.right.clone()),
                row.left_only,
-                "row {i}b: {:?} -- {:?}",
+                "row {}b: {:?} -- {:?}",
+                i,
                row.left,
                row.right
            );
@ -1062,7 +1033,7 @@ mod tests {

    #[test]
    #[allow(clippy::single_range_in_vec_init)]
-    fn test_character_set_simplify_ignoring() {
+    fn test_character_set_get_ranges() {
        struct Row {
            chars: Vec<char>,
            ruled_out_chars: Vec<char>,
@ -1085,21 +1056,6 @@ mod tests {
                ruled_out_chars: vec!['d', 'f', 'g'],
                expected_ranges: vec!['a'..'h', 'z'..'z'],
            },
-            Row {
-                chars: vec!['a', 'b', 'c', 'g', 'h', 'i'],
-                ruled_out_chars: vec!['d', 'j'],
-                expected_ranges: vec!['a'..'c', 'g'..'i'],
-            },
-            Row {
-                chars: vec!['c', 'd', 'e', 'g', 'h'],
-                ruled_out_chars: vec!['a', 'b', 'c', 'd', 'e', 'f'],
-                expected_ranges: vec!['g'..'h'],
-            },
-            Row {
-                chars: vec!['I', 'N'],
-                ruled_out_chars: vec!['A', 'I', 'N', 'Z'],
-                expected_ranges: vec![],
-            },
        ];

        for Row {
@ -1108,23 +1064,13 @@ mod tests {
            expected_ranges,
        } in &table
        {
-            let ruled_out_chars = ruled_out_chars
-                .iter()
-                .fold(CharacterSet::empty(), |set, c| set.add_char(*c));
+            let ruled_out_chars = ruled_out_chars.iter().map(|c: &char| *c as u32).collect();
            let mut set = CharacterSet::empty();
            for c in chars {
                set = set.add_char(*c);
            }
-            let actual = set.simplify_ignoring(&ruled_out_chars);
-            let expected = expected_ranges
-                .iter()
-                .fold(CharacterSet::empty(), |set, range| {
-                    set.add_range(range.start, range.end)
-                });
-            assert_eq!(
-                actual, expected,
-                "chars: {chars:?}, ruled out chars: {ruled_out_chars:?}"
-            );
+            let ranges = set.simplify_ignoring(&ruled_out_chars);
+            assert_eq!(ranges, *expected_ranges);
        }
    }
 }
--- a/crates/generate/src/node_types.rs
+++ b/crates/generate/src/node_types.rs
@ -1,12 +1,9 @@
-use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
-
+use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
+use super::rules::{Alias, AliasMap, Symbol, SymbolType};
+use anyhow::{anyhow, Result};
 use serde::Serialize;
-use thiserror::Error;
-
-use super::{
-    grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
-    rules::{Alias, AliasMap, Symbol, SymbolType},
-};
+use std::cmp::Ordering;
+use std::collections::{BTreeMap, HashMap, HashSet};

 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum ChildType {
@ -29,15 +26,10 @@ pub struct VariableInfo {
 }

 #[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)]
-#[cfg(feature = "load")]
 pub struct NodeInfoJSON {
    #[serde(rename = "type")]
    kind: String,
    named: bool,
-    #[serde(skip_serializing_if = "std::ops::Not::not")]
-    root: bool,
-    #[serde(skip_serializing_if = "std::ops::Not::not")]
-    extra: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    fields: Option<BTreeMap<String, FieldInfoJSON>>,
    #[serde(skip_serializing_if = "Option::is_none")]
@ -47,7 +39,6 @@ pub struct NodeInfoJSON {
 }

 #[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[cfg(feature = "load")]
 pub struct NodeTypeJSON {
    #[serde(rename = "type")]
    kind: String,
@ -55,7 +46,6 @@ pub struct NodeTypeJSON {
 }

 #[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
-#[cfg(feature = "load")]
 pub struct FieldInfoJSON {
    multiple: bool,
    required: bool,
@ -69,7 +59,6 @@ pub struct ChildQuantity {
    multiple: bool,
 }

-#[cfg(feature = "load")]
 impl Default for FieldInfoJSON {
    fn default() -> Self {
        Self {
@ -105,7 +94,7 @@ impl ChildQuantity {
        }
    }

-    const fn append(&mut self, other: Self) {
+    fn append(&mut self, other: Self) {
        if other.exists {
            if self.exists || other.multiple {
                self.multiple = true;
@ -117,7 +106,7 @@ impl ChildQuantity {
        }
    }

-    const fn union(&mut self, other: Self) -> bool {
+    fn union(&mut self, other: Self) -> bool {
        let mut result = false;
        if !self.exists && other.exists {
            result = true;
@ -135,14 +124,6 @@ impl ChildQuantity {
    }
 }

-pub type VariableInfoResult<T> = Result<T, VariableInfoError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum VariableInfoError {
-    #[error("Grammar error: Supertype symbols must always have a single visible child, but `{0}` can have multiple")]
-    InvalidSupertype(String),
-}
-
 /// Compute a summary of the public-facing structure of each variable in the
 /// grammar. Each variable in the grammar corresponds to a distinct public-facing
 /// node type.
@ -153,22 +134,23 @@ pub enum VariableInfoError {
 ///    * `types` - The types of visible children the field can contain.
 ///    * `optional` - Do `N` nodes always have this field?
 ///    * `multiple` - Can `N` nodes have multiple children for this field?
-/// 3. `children_without_fields` - The *other* named children of `N` that are not associated with
-///    fields. Data regarding these children:
+/// 3. `children_without_fields` - The *other* named children of `N` that are
+///    not associated with fields. Data regarding these children:
 ///    * `types` - The types of named children with no field.
 ///    * `optional` - Do `N` nodes always have at least one named child with no field?
 ///    * `multiple` - Can `N` nodes have multiple named children with no field?
 ///
 /// Each summary must account for some indirect factors:
-/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible children of `C`
-///    *appear* to be direct children of `N`.
-/// 2. aliases. If a parent node type `M` is aliased as some other type `N`, then nodes which
-///    *appear* to have type `N` may have internal structure based on `M`.
+/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible
+///    children of `C` *appear* to be direct children of `N`.
+/// 2. aliases. If a parent node type `M` is aliased as some other type `N`,
+///    then nodes which *appear* to have type `N` may have internal structure based
+///    on `M`.
 pub fn get_variable_info(
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
    default_aliases: &AliasMap,
-) -> VariableInfoResult<Vec<VariableInfo>> {
+) -> Result<Vec<VariableInfo>> {
    let child_type_is_visible = |t: &ChildType| {
        variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
    };
@ -236,8 +218,7 @@ pub fn get_variable_info(
                            .entry(field_name)
                            .or_insert_with(ChildQuantity::zero);

-                        // Inherit the types and quantities of hidden children associated with
-                        // fields.
+                        // Inherit the types and quantities of hidden children associated with fields.
                        if child_is_hidden && child_symbol.is_non_terminal() {
                            let child_variable_info = &result[child_symbol.index];
                            did_change |= extend_sorted(
@ -349,7 +330,13 @@ pub fn get_variable_info(
    for supertype_symbol in &syntax_grammar.supertype_symbols {
        if result[supertype_symbol.index].has_multi_step_production {
            let variable = &syntax_grammar.variables[supertype_symbol.index];
-            Err(VariableInfoError::InvalidSupertype(variable.name.clone()))?;
+            return Err(anyhow!(
+                concat!(
+                    "Grammar error: Supertype symbols must always ",
+                    "have a single visible child, but `{}` can have multiple"
+                ),
+                variable.name
+            ));
        }
    }

@ -374,105 +361,12 @@ pub fn get_variable_info(
    Ok(result)
 }

-fn get_aliases_by_symbol(
-    syntax_grammar: &SyntaxGrammar,
-    default_aliases: &AliasMap,
-) -> HashMap<Symbol, BTreeSet<Option<Alias>>> {
-    let mut aliases_by_symbol = HashMap::new();
-    for (symbol, alias) in default_aliases {
-        aliases_by_symbol.insert(*symbol, {
-            let mut aliases = BTreeSet::new();
-            aliases.insert(Some(alias.clone()));
-            aliases
-        });
-    }
-    for extra_symbol in &syntax_grammar.extra_symbols {
-        if !default_aliases.contains_key(extra_symbol) {
-            aliases_by_symbol
-                .entry(*extra_symbol)
-                .or_insert_with(BTreeSet::new)
-                .insert(None);
-        }
-    }
-    for variable in &syntax_grammar.variables {
-        for production in &variable.productions {
-            for step in &production.steps {
-                aliases_by_symbol
-                    .entry(step.symbol)
-                    .or_insert_with(BTreeSet::new)
-                    .insert(
-                        step.alias
-                            .as_ref()
-                            .or_else(|| default_aliases.get(&step.symbol))
-                            .cloned(),
-                    );
-            }
-        }
-    }
-    aliases_by_symbol.insert(
-        Symbol::non_terminal(0),
-        std::iter::once(&None).cloned().collect(),
-    );
-    aliases_by_symbol
-}
-
-pub fn get_supertype_symbol_map(
-    syntax_grammar: &SyntaxGrammar,
-    default_aliases: &AliasMap,
-    variable_info: &[VariableInfo],
-) -> BTreeMap<Symbol, Vec<ChildType>> {
-    let aliases_by_symbol = get_aliases_by_symbol(syntax_grammar, default_aliases);
-    let mut supertype_symbol_map = BTreeMap::new();
-
-    let mut symbols_by_alias = HashMap::new();
-    for (symbol, aliases) in &aliases_by_symbol {
-        for alias in aliases.iter().flatten() {
-            symbols_by_alias
-                .entry(alias)
-                .or_insert_with(Vec::new)
-                .push(*symbol);
-        }
-    }
-
-    for (i, info) in variable_info.iter().enumerate() {
-        let symbol = Symbol::non_terminal(i);
-        if syntax_grammar.supertype_symbols.contains(&symbol) {
-            let subtypes = info.children.types.clone();
-            supertype_symbol_map.insert(symbol, subtypes);
-        }
-    }
-    supertype_symbol_map
-}
-
-#[cfg(feature = "load")]
-pub type SuperTypeCycleResult<T> = Result<T, SuperTypeCycleError>;
-
-#[derive(Debug, Error, Serialize)]
-pub struct SuperTypeCycleError {
-    items: Vec<String>,
-}
-
-impl std::fmt::Display for SuperTypeCycleError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Dependency cycle detected in node types:")?;
-        for (i, item) in self.items.iter().enumerate() {
-            write!(f, " {item}")?;
-            if i < self.items.len() - 1 {
-                write!(f, ",")?;
-            }
-        }
-
-        Ok(())
-    }
-}
-
-#[cfg(feature = "load")]
 pub fn generate_node_types_json(
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
    default_aliases: &AliasMap,
    variable_info: &[VariableInfo],
-) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
+) -> Vec<NodeInfoJSON> {
    let mut node_types_json = BTreeMap::new();

    let child_type_to_node_type = |child_type: &ChildType| match child_type {
@ -528,32 +422,41 @@ pub fn generate_node_types_json(
        }
    };

-    let aliases_by_symbol = get_aliases_by_symbol(syntax_grammar, default_aliases);
-
-    let empty = BTreeSet::new();
-    let extra_names = syntax_grammar
-        .extra_symbols
-        .iter()
-        .flat_map(|symbol| {
+    let mut aliases_by_symbol = HashMap::new();
+    for (symbol, alias) in default_aliases {
+        aliases_by_symbol.insert(*symbol, {
+            let mut aliases = HashSet::new();
+            aliases.insert(Some(alias.clone()));
+            aliases
+        });
+    }
+    for extra_symbol in &syntax_grammar.extra_symbols {
+        if !default_aliases.contains_key(extra_symbol) {
            aliases_by_symbol
-                .get(symbol)
-                .unwrap_or(&empty)
-                .iter()
-                .map(|alias| {
-                    alias.as_ref().map_or(
-                        match symbol.kind {
-                            SymbolType::NonTerminal => &syntax_grammar.variables[symbol.index].name,
-                            SymbolType::Terminal => &lexical_grammar.variables[symbol.index].name,
-                            SymbolType::External => {
-                                &syntax_grammar.external_tokens[symbol.index].name
-                            }
-                            _ => unreachable!(),
-                        },
-                        |alias| &alias.value,
-                    )
-                })
-        })
-        .collect::<HashSet<_>>();
+                .entry(*extra_symbol)
+                .or_insert_with(HashSet::new)
+                .insert(None);
+        }
+    }
+    for variable in &syntax_grammar.variables {
+        for production in &variable.productions {
+            for step in &production.steps {
+                aliases_by_symbol
+                    .entry(step.symbol)
+                    .or_insert_with(HashSet::new)
+                    .insert(
+                        step.alias
+                            .as_ref()
+                            .or_else(|| default_aliases.get(&step.symbol))
+                            .cloned(),
+                    );
+            }
+        }
+    }
+    aliases_by_symbol.insert(
+        Symbol::non_terminal(0),
+        std::iter::once(&None).cloned().collect(),
+    );

    let mut subtype_map = Vec::new();
    for (i, info) in variable_info.iter().enumerate() {
@ -566,8 +469,6 @@ pub fn generate_node_types_json(
                    .or_insert_with(|| NodeInfoJSON {
                        kind: variable.name.clone(),
                        named: true,
-                        root: false,
-                        extra: extra_names.contains(&variable.name),
                        fields: None,
                        children: None,
                        subtypes: None,
@ -589,7 +490,10 @@ pub fn generate_node_types_json(
        } else if !syntax_grammar.variables_to_inline.contains(&symbol) {
            // If a rule is aliased under multiple names, then its information
            // contributes to multiple entries in the final JSON.
-            for alias in aliases_by_symbol.get(&symbol).unwrap_or(&BTreeSet::new()) {
+            for alias in aliases_by_symbol
+                .get(&Symbol::non_terminal(i))
+                .unwrap_or(&HashSet::new())
+            {
                let kind;
                let is_named;
                if let Some(alias) = alias {
@ -610,8 +514,6 @@ pub fn generate_node_types_json(
                    NodeInfoJSON {
                        kind: kind.clone(),
                        named: is_named,
-                        root: i == 0,
-                        extra: extra_names.contains(&kind),
                        fields: Some(BTreeMap::new()),
                        children: None,
                        subtypes: None,
@ -621,8 +523,8 @@ pub fn generate_node_types_json(
                let fields_json = node_type_json.fields.as_mut().unwrap();
                for (new_field, field_info) in &info.fields {
                    let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| {
-                        // If another rule is aliased with the same name, and does *not* have this
-                        // field, then this field cannot be required.
+                        // If another rule is aliased with the same name, and does *not* have this field,
+                        // then this field cannot be required.
                        let mut field_json = FieldInfoJSON::default();
                        if node_type_existed {
                            field_json.required = false;
@ -632,8 +534,8 @@ pub fn generate_node_types_json(
                    populate_field_info_json(field_json, field_info);
                }

-                // If another rule is aliased with the same name, any fields that aren't present in
-                // this cannot be required.
+                // If another rule is aliased with the same name, any fields that aren't present in this
+                // cannot be required.
                for (existing_field, field_json) in fields_json.iter_mut() {
                    if !info.fields.contains_key(existing_field) {
                        field_json.required = false;
@ -650,40 +552,22 @@ pub fn generate_node_types_json(
        }
    }

-    // Sort the subtype map topologically so that subtypes are listed before their supertypes.
-    let mut sorted_kinds = Vec::with_capacity(subtype_map.len());
-    let mut top_sort = topological_sort::TopologicalSort::<String>::new();
-    for (supertype, subtypes) in &subtype_map {
-        for subtype in subtypes {
-            top_sort.add_dependency(subtype.kind.clone(), supertype.kind.clone());
-        }
-    }
-    loop {
-        let mut next_kinds = top_sort.pop_all();
-        match (next_kinds.is_empty(), top_sort.is_empty()) {
-            (true, true) => break,
-            (true, false) => {
-                let mut items = top_sort.collect::<Vec<String>>();
-                items.sort();
-                return Err(SuperTypeCycleError { items });
-            }
-            (false, _) => {
-                next_kinds.sort();
-                sorted_kinds.extend(next_kinds);
-            }
-        }
-    }
+    // Sort the subtype map so that subtypes are listed before their supertypes.
    subtype_map.sort_by(|a, b| {
-        let a_idx = sorted_kinds.iter().position(|n| n.eq(&a.0.kind)).unwrap();
-        let b_idx = sorted_kinds.iter().position(|n| n.eq(&b.0.kind)).unwrap();
-        a_idx.cmp(&b_idx)
+        if b.1.contains(&a.0) {
+            Ordering::Less
+        } else if a.1.contains(&b.0) {
+            Ordering::Greater
+        } else {
+            Ordering::Equal
+        }
    });

    for node_type_json in node_types_json.values_mut() {
        if node_type_json
            .children
            .as_ref()
-            .is_some_and(|c| c.types.is_empty())
+            .map_or(false, |c| c.types.is_empty())
        {
            node_type_json.children = None;
        }
@ -700,6 +584,7 @@ pub fn generate_node_types_json(

    let mut anonymous_node_types = Vec::new();

+    let empty = HashSet::new();
    let regular_tokens = lexical_grammar
        .variables
        .iter()
@ -737,18 +622,13 @@ pub fn generate_node_types_json(
    for (name, kind) in regular_tokens.chain(external_tokens) {
        match kind {
            VariableType::Named => {
-                let node_type_json =
-                    node_types_json
-                        .entry(name.clone())
-                        .or_insert_with(|| NodeInfoJSON {
-                            kind: name.clone(),
-                            named: true,
-                            root: false,
-                            extra: extra_names.contains(&name),
-                            fields: None,
-                            children: None,
-                            subtypes: None,
-                        });
+                let node_type_json = node_types_json.entry(name.clone()).or_insert(NodeInfoJSON {
+                    kind: name.clone(),
+                    named: true,
+                    fields: None,
+                    children: None,
+                    subtypes: None,
+                });
                if let Some(children) = &mut node_type_json.children {
                    children.required = false;
                }
@ -761,8 +641,6 @@ pub fn generate_node_types_json(
            VariableType::Anonymous => anonymous_node_types.push(NodeInfoJSON {
                kind: name.clone(),
                named: false,
-                root: false,
-                extra: extra_names.contains(&name),
                fields: None,
                children: None,
                subtypes: None,
@ -783,15 +661,11 @@ pub fn generate_node_types_json(
                a_is_leaf.cmp(&b_is_leaf)
            })
            .then_with(|| a.kind.cmp(&b.kind))
-            .then_with(|| a.named.cmp(&b.named))
-            .then_with(|| a.root.cmp(&b.root))
-            .then_with(|| a.extra.cmp(&b.extra))
    });
    result.dedup();
-    Ok(result)
+    result
 }

-#[cfg(feature = "load")]
 fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec<NodeTypeJSON>)]) {
    for (supertype, subtypes) in subtype_map {
        if info.types.contains(supertype) {
@ -828,26 +702,24 @@ fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T
 where
    T: 'a + Clone + Eq + Ord,
 {
-    values.into_iter().fold(false, |acc, value| {
+    values.into_iter().any(|value| {
        if let Err(i) = vec.binary_search(value) {
            vec.insert(i, value.clone());
            true
        } else {
-            acc
+            false
        }
    })
 }

-#[cfg(all(test, feature = "load"))]
+#[cfg(test)]
 mod tests {
    use super::*;
-    use crate::{
-        grammars::{
-            InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
-        },
-        prepare_grammar::prepare_grammar,
-        rules::Rule,
+    use crate::generate::grammars::{
+        InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
    };
+    use crate::generate::prepare_grammar::prepare_grammar;
+    use crate::generate::rules::Rule;

    #[test]
    fn test_node_types_simple() {
@ -875,8 +747,7 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(node_types.len(), 3);

@ -885,8 +756,6 @@ mod tests {
            NodeInfoJSON {
                kind: "v1".to_string(),
                named: true,
-                root: true,
-                extra: false,
                subtypes: None,
                children: None,
                fields: Some(
@ -924,8 +793,6 @@ mod tests {
            NodeInfoJSON {
                kind: ";".to_string(),
                named: false,
-                root: false,
-                extra: false,
                subtypes: None,
                children: None,
                fields: None
@ -936,8 +803,6 @@ mod tests {
            NodeInfoJSON {
                kind: "v2".to_string(),
                named: true,
-                root: false,
-                extra: false,
                subtypes: None,
                children: None,
                fields: None
@ -965,9 +830,7 @@ mod tests {
                },
                // This rule is not reachable from the start symbol, but
                // it is reachable from the 'extra_symbols' so it
-                // should be present in the node_types.
-                // But because it's only a literal, it will get replaced by
-                // a lexical variable.
+                // should be present in the node_types
                Variable {
                    name: "v3".to_string(),
                    kind: VariableType::Named,
@ -975,8 +838,7 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(node_types.len(), 4);

@ -985,8 +847,6 @@ mod tests {
            NodeInfoJSON {
                kind: "v1".to_string(),
                named: true,
-                root: true,
-                extra: false,
                subtypes: None,
                children: None,
                fields: Some(
@ -1024,8 +884,6 @@ mod tests {
            NodeInfoJSON {
                kind: ";".to_string(),
                named: false,
-                root: false,
-                extra: false,
                subtypes: None,
                children: None,
                fields: None
@ -1036,8 +894,6 @@ mod tests {
            NodeInfoJSON {
                kind: "v2".to_string(),
                named: true,
-                root: false,
-                extra: false,
                subtypes: None,
                children: None,
                fields: None
@ -1048,120 +904,6 @@ mod tests {
            NodeInfoJSON {
                kind: "v3".to_string(),
                named: true,
-                root: false,
-                extra: true,
-                subtypes: None,
-                children: None,
-                fields: None
-            }
-        );
-    }
-
-    #[test]
-    fn test_node_types_deeper_extras() {
-        let node_types = get_node_types(&InputGrammar {
-            extra_symbols: vec![Rule::named("v3")],
-            variables: vec![
-                Variable {
-                    name: "v1".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::seq(vec![
-                        Rule::field("f1".to_string(), Rule::named("v2")),
-                        Rule::field("f2".to_string(), Rule::string(";")),
-                    ]),
-                },
-                Variable {
-                    name: "v2".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::string("x"),
-                },
-                // This rule is not reachable from the start symbol, but
-                // it is reachable from the 'extra_symbols' so it
-                // should be present in the node_types.
-                // Because it is not just a literal, it won't get replaced
-                // by a lexical variable.
-                Variable {
-                    name: "v3".to_string(),
-                    kind: VariableType::Named,
-                    rule: Rule::seq(vec![Rule::string("y"), Rule::repeat(Rule::string("z"))]),
-                },
-            ],
-            ..Default::default()
-        })
-        .unwrap();
-
-        assert_eq!(node_types.len(), 6);
-
-        assert_eq!(
-            node_types[0],
-            NodeInfoJSON {
-                kind: "v1".to_string(),
-                named: true,
-                root: true,
-                extra: false,
-                subtypes: None,
-                children: None,
-                fields: Some(
-                    vec![
-                        (
-                            "f1".to_string(),
-                            FieldInfoJSON {
-                                multiple: false,
-                                required: true,
-                                types: vec![NodeTypeJSON {
-                                    kind: "v2".to_string(),
-                                    named: true,
-                                }]
-                            }
-                        ),
-                        (
-                            "f2".to_string(),
-                            FieldInfoJSON {
-                                multiple: false,
-                                required: true,
-                                types: vec![NodeTypeJSON {
-                                    kind: ";".to_string(),
-                                    named: false,
-                                }]
-                            }
-                        ),
-                    ]
-                    .into_iter()
-                    .collect()
-                )
-            }
-        );
-        assert_eq!(
-            node_types[1],
-            NodeInfoJSON {
-                kind: "v3".to_string(),
-                named: true,
-                root: false,
-                extra: true,
-                subtypes: None,
-                children: None,
-                fields: Some(BTreeMap::default())
-            }
-        );
-        assert_eq!(
-            node_types[2],
-            NodeInfoJSON {
-                kind: ";".to_string(),
-                named: false,
-                root: false,
-                extra: false,
-                subtypes: None,
-                children: None,
-                fields: None
-            }
-        );
-        assert_eq!(
-            node_types[3],
-            NodeInfoJSON {
-                kind: "v2".to_string(),
-                named: true,
-                root: false,
-                extra: false,
                subtypes: None,
                children: None,
                fields: None
@ -1200,16 +942,13 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(
            node_types[0],
            NodeInfoJSON {
                kind: "_v2".to_string(),
                named: true,
-                root: false,
-                extra: false,
                fields: None,
                children: None,
                subtypes: Some(vec![
@ -1233,8 +972,6 @@ mod tests {
            NodeInfoJSON {
                kind: "v1".to_string(),
                named: true,
-                root: true,
-                extra: false,
                subtypes: None,
                children: None,
                fields: Some(
@ -1290,16 +1027,13 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(
            node_types[0],
            NodeInfoJSON {
                kind: "v1".to_string(),
                named: true,
-                root: true,
-                extra: false,
                subtypes: None,
                children: Some(FieldInfoJSON {
                    multiple: true,
@ -1337,8 +1071,6 @@ mod tests {
            NodeInfoJSON {
                kind: "v2".to_string(),
                named: true,
-                root: false,
-                extra: false,
                subtypes: None,
                children: Some(FieldInfoJSON {
                    multiple: false,
@ -1376,16 +1108,13 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(
            node_types[0],
            NodeInfoJSON {
                kind: "v1".to_string(),
                named: true,
-                root: true,
-                extra: false,
                subtypes: None,
                children: Some(FieldInfoJSON {
                    multiple: true,
@ -1451,8 +1180,7 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None);
        assert_eq!(
@ -1460,8 +1188,6 @@ mod tests {
            Some(&NodeInfoJSON {
                kind: "identifier".to_string(),
                named: true,
-                root: false,
-                extra: false,
                subtypes: None,
                children: None,
                fields: None,
@ -1472,8 +1198,6 @@ mod tests {
            Some(&NodeInfoJSON {
                kind: "type_identifier".to_string(),
                named: true,
-                root: false,
-                extra: false,
                subtypes: None,
                children: None,
                fields: None,
@ -1508,16 +1232,13 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(
            node_types[0],
            NodeInfoJSON {
                kind: "a".to_string(),
                named: true,
-                root: true,
-                extra: false,
                subtypes: None,
                children: Some(FieldInfoJSON {
                    multiple: true,
@ -1558,16 +1279,13 @@ mod tests {
                ]),
            }],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(
            node_types,
            [NodeInfoJSON {
                kind: "script".to_string(),
                named: true,
-                root: true,
-                extra: false,
                fields: Some(BTreeMap::new()),
                children: None,
                subtypes: None
@ -1607,8 +1325,7 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(
            &node_types
@ -1625,8 +1342,6 @@ mod tests {
                NodeInfoJSON {
                    kind: "a".to_string(),
                    named: true,
-                    root: false,
-                    extra: false,
                    subtypes: None,
                    children: None,
                    fields: Some(
@ -1682,8 +1397,6 @@ mod tests {
                NodeInfoJSON {
                    kind: "script".to_string(),
                    named: true,
-                    root: true,
-                    extra: false,
                    subtypes: None,
                    // Only one node
                    children: Some(FieldInfoJSON {
@ -1727,8 +1440,7 @@ mod tests {
                },
            ],
            ..Default::default()
-        })
-        .unwrap();
+        });

        assert_eq!(
            node_types.iter().map(|n| &n.kind).collect::<Vec<_>>(),
@ -1739,8 +1451,6 @@ mod tests {
            NodeInfoJSON {
                kind: "b".to_string(),
                named: true,
-                root: false,
-                extra: false,
                subtypes: None,
                children: Some(FieldInfoJSON {
                    multiple: true,
@ -2055,7 +1765,7 @@ mod tests {
        );
    }

-    fn get_node_types(grammar: &InputGrammar) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
+    fn get_node_types(grammar: &InputGrammar) -> Vec<NodeInfoJSON> {
        let (syntax_grammar, lexical_grammar, _, default_aliases) =
            prepare_grammar(grammar).unwrap();
        let variable_info =
--- a/cli/src/generate/parse_grammar.rs
+++ b/cli/src/generate/parse_grammar.rs
@ -0,0 +1,238 @@
+use super::grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType};
+use super::rules::{Precedence, Rule};
+use anyhow::{anyhow, Result};
+use serde::Deserialize;
+use serde_json::{Map, Value};
+
+#[derive(Deserialize)]
+#[serde(tag = "type")]
+#[allow(non_camel_case_types)]
+#[allow(clippy::upper_case_acronyms)]
+enum RuleJSON {
+    ALIAS {
+        content: Box<RuleJSON>,
+        named: bool,
+        value: String,
+    },
+    BLANK,
+    STRING {
+        value: String,
+    },
+    PATTERN {
+        value: String,
+        flags: Option<String>,
+    },
+    SYMBOL {
+        name: String,
+    },
+    CHOICE {
+        members: Vec<RuleJSON>,
+    },
+    FIELD {
+        name: String,
+        content: Box<RuleJSON>,
+    },
+    SEQ {
+        members: Vec<RuleJSON>,
+    },
+    REPEAT {
+        content: Box<RuleJSON>,
+    },
+    REPEAT1 {
+        content: Box<RuleJSON>,
+    },
+    PREC_DYNAMIC {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    PREC_LEFT {
+        value: PrecedenceValueJSON,
+        content: Box<RuleJSON>,
+    },
+    PREC_RIGHT {
+        value: PrecedenceValueJSON,
+        content: Box<RuleJSON>,
+    },
+    PREC {
+        value: PrecedenceValueJSON,
+        content: Box<RuleJSON>,
+    },
+    TOKEN {
+        content: Box<RuleJSON>,
+    },
+    IMMEDIATE_TOKEN {
+        content: Box<RuleJSON>,
+    },
+}
+
+#[derive(Deserialize)]
+#[serde(untagged)]
+enum PrecedenceValueJSON {
+    Integer(i32),
+    Name(String),
+}
+
+#[derive(Deserialize)]
+pub(crate) struct GrammarJSON {
+    pub(crate) name: String,
+    rules: Map<String, Value>,
+    #[serde(default)]
+    precedences: Vec<Vec<RuleJSON>>,
+    #[serde(default)]
+    conflicts: Vec<Vec<String>>,
+    #[serde(default)]
+    externals: Vec<RuleJSON>,
+    #[serde(default)]
+    extras: Vec<RuleJSON>,
+    #[serde(default)]
+    inline: Vec<String>,
+    #[serde(default)]
+    supertypes: Vec<String>,
+    word: Option<String>,
+}
+
+pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
+    let grammar_json: GrammarJSON = serde_json::from_str(input)?;
+
+    let mut variables = Vec::with_capacity(grammar_json.rules.len());
+    for (name, value) in grammar_json.rules {
+        variables.push(Variable {
+            name: name.clone(),
+            kind: VariableType::Named,
+            rule: parse_rule(serde_json::from_value(value)?),
+        });
+    }
+
+    let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len());
+    for list in grammar_json.precedences {
+        let mut ordering = Vec::with_capacity(list.len());
+        for entry in list {
+            ordering.push(match entry {
+                RuleJSON::STRING { value } => PrecedenceEntry::Name(value),
+                RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name),
+                _ => {
+                    return Err(anyhow!(
+                        "Invalid rule in precedences array. Only strings and symbols are allowed"
+                    ))
+                }
+            });
+        }
+        precedence_orderings.push(ordering);
+    }
+
+    let extra_symbols = grammar_json.extras.into_iter().map(parse_rule).collect();
+    let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect();
+
+    Ok(InputGrammar {
+        name: grammar_json.name,
+        word_token: grammar_json.word,
+        expected_conflicts: grammar_json.conflicts,
+        supertype_symbols: grammar_json.supertypes,
+        variables_to_inline: grammar_json.inline,
+        precedence_orderings,
+        variables,
+        extra_symbols,
+        external_tokens,
+    })
+}
+
+fn parse_rule(json: RuleJSON) -> Rule {
+    match json {
+        RuleJSON::ALIAS {
+            content,
+            value,
+            named,
+        } => Rule::alias(parse_rule(*content), value, named),
+        RuleJSON::BLANK => Rule::Blank,
+        RuleJSON::STRING { value } => Rule::String(value),
+        RuleJSON::PATTERN { value, flags } => Rule::Pattern(
+            value,
+            flags.map_or(String::new(), |f| {
+                f.chars()
+                    .filter(|c| {
+                        if *c == 'i' {
+                            *c != 'u' // silently ignore unicode flag
+                        } else {
+                            eprintln!("Warning: unsupported flag {c}");
+                            false
+                        }
+                    })
+                    .collect()
+            }),
+        ),
+        RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
+        RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
+        RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),
+        RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
+        RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
+        RuleJSON::REPEAT { content } => {
+            Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank])
+        }
+        RuleJSON::PREC { value, content } => Rule::prec(value.into(), parse_rule(*content)),
+        RuleJSON::PREC_LEFT { value, content } => {
+            Rule::prec_left(value.into(), parse_rule(*content))
+        }
+        RuleJSON::PREC_RIGHT { value, content } => {
+            Rule::prec_right(value.into(), parse_rule(*content))
+        }
+        RuleJSON::PREC_DYNAMIC { value, content } => {
+            Rule::prec_dynamic(value, parse_rule(*content))
+        }
+        RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
+        RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
+    }
+}
+
+impl From<PrecedenceValueJSON> for Precedence {
+    fn from(val: PrecedenceValueJSON) -> Self {
+        match val {
+            PrecedenceValueJSON::Integer(i) => Self::Integer(i),
+            PrecedenceValueJSON::Name(i) => Self::Name(i),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_grammar() {
+        let grammar = parse_grammar(
+            r#"{
+            "name": "my_lang",
+            "rules": {
+                "file": {
+                    "type": "REPEAT1",
+                    "content": {
+                        "type": "SYMBOL",
+                        "name": "statement"
+                    }
+                },
+                "statement": {
+                    "type": "STRING",
+                    "value": "foo"
+                }
+            }
+        }"#,
+        )
+        .unwrap();
+
+        assert_eq!(grammar.name, "my_lang");
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable {
+                    name: "file".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
+                },
+                Variable {
+                    name: "statement".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::String("foo".to_string())
+                },
+            ]
+        );
+    }
+}
--- a/crates/generate/src/prepare_grammar/expand_repeats.rs
+++ b/crates/generate/src/prepare_grammar/expand_repeats.rs
@ -1,10 +1,8 @@
-use std::{collections::HashMap, mem};
-
 use super::ExtractedSyntaxGrammar;
-use crate::{
-    grammars::{Variable, VariableType},
-    rules::{Rule, Symbol},
-};
+use crate::generate::grammars::{Variable, VariableType};
+use crate::generate::rules::{Rule, Symbol};
+use std::collections::HashMap;
+use std::mem;

 struct Expander {
    variable_name: String,
@ -59,7 +57,7 @@ impl Expander {
                params: params.clone(),
            },

-            // For repetitions, introduce an auxiliary rule that contains the
+            // For repetitions, introduce an auxiliary rule that contains the the
            // repeated content, but can also contain a recursive binary tree structure.
            Rule::Repeat(content) => {
                let inner_rule = self.expand_rule(content);
--- a/crates/generate/src/prepare_grammar/expand_tokens.rs
+++ b/crates/generate/src/prepare_grammar/expand_tokens.rs
@ -1,16 +1,35 @@
-use regex_syntax::{
-    hir::{Class, Hir, HirKind},
-    ParserBuilder,
-};
-use serde::Serialize;
-use thiserror::Error;
-
 use super::ExtractedLexicalGrammar;
-use crate::{
-    grammars::{LexicalGrammar, LexicalVariable},
-    nfa::{CharacterSet, Nfa, NfaState},
-    rules::{Precedence, Rule},
+use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
+use crate::generate::rules::{Precedence, Rule};
+use anyhow::{anyhow, Context, Result};
+use lazy_static::lazy_static;
+use regex::Regex;
+use regex_syntax::ast::{
+    parse, Ast, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, ClassUnicodeKind,
+    RepetitionKind, RepetitionRange,
 };
+use std::collections::HashMap;
+use std::i32;
+
+lazy_static! {
+    static ref CURLY_BRACE_REGEX: Regex =
+        Regex::new(r"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}").unwrap();
+    static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
+        serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
+    static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
+        serde_json::from_str(UNICODE_PROPERTIES_JSON).unwrap();
+    static ref UNICODE_CATEGORY_ALIASES: HashMap<&'static str, String> =
+        serde_json::from_str(UNICODE_CATEGORY_ALIASES_JSON).unwrap();
+    static ref UNICODE_PROPERTY_ALIASES: HashMap<&'static str, String> =
+        serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap();
+}
+
+const UNICODE_CATEGORIES_JSON: &str = include_str!("./unicode-categories.json");
+const UNICODE_PROPERTIES_JSON: &str = include_str!("./unicode-properties.json");
+const UNICODE_CATEGORY_ALIASES_JSON: &str = include_str!("./unicode-category-aliases.json");
+const UNICODE_PROPERTY_ALIASES_JSON: &str = include_str!("./unicode-property-aliases.json");
+const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];

 struct NfaBuilder {
    nfa: Nfa,
@ -18,40 +37,6 @@ struct NfaBuilder {
    precedence_stack: Vec<i32>,
 }

-pub type ExpandTokensResult<T> = Result<T, ExpandTokensError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum ExpandTokensError {
-    #[error(
-        "The rule `{0}` matches the empty string.
-Tree-sitter does not support syntactic rules that match the empty string
-unless they are used only as the grammar's start rule.
-"
-    )]
-    EmptyString(String),
-    #[error(transparent)]
-    Processing(ExpandTokensProcessingError),
-    #[error(transparent)]
-    ExpandRule(ExpandRuleError),
-}
-
-#[derive(Debug, Error, Serialize)]
-pub struct ExpandTokensProcessingError {
-    rule: String,
-    error: ExpandRuleError,
-}
-
-impl std::fmt::Display for ExpandTokensProcessingError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        writeln!(
-            f,
-            "Error processing rule {}: Grammar error: Unexpected rule {:?}",
-            self.rule, self.error
-        )?;
-        Ok(())
-    }
-}
-
 fn get_implicit_precedence(rule: &Rule) -> i32 {
    match rule {
        Rule::String(_) => 2,
@ -75,7 +60,30 @@ const fn get_completion_precedence(rule: &Rule) -> i32 {
    0
 }

-pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult<LexicalGrammar> {
+fn preprocess_regex(content: &str) -> String {
+    let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}");
+    let mut result = String::with_capacity(content.len());
+    let mut is_escaped = false;
+    for c in content.chars() {
+        if is_escaped {
+            if !ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) {
+                result.push('\\');
+            }
+            result.push(c);
+            is_escaped = false;
+        } else if c == '\\' {
+            is_escaped = true;
+        } else {
+            result.push(c);
+        }
+    }
+    if is_escaped {
+        result.push('\\');
+    }
+    result
+}
+
+pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
    let mut builder = NfaBuilder {
        nfa: Nfa::new(),
        is_sep: true,
@ -89,12 +97,8 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult
        Rule::repeat(Rule::choice(grammar.separators))
    };

-    let mut variables = Vec::with_capacity(grammar.variables.len());
+    let mut variables = Vec::new();
    for (i, variable) in grammar.variables.into_iter().enumerate() {
-        if variable.rule.is_empty() {
-            Err(ExpandTokensError::EmptyString(variable.name.clone()))?;
-        }
-
        let is_immediate_token = match &variable.rule {
            Rule::Metadata { params, .. } => params.is_main_token,
            _ => false,
@ -108,19 +112,12 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult
        let last_state_id = builder.nfa.last_state_id();
        builder
            .expand_rule(&variable.rule, last_state_id)
-            .map_err(|e| {
-                ExpandTokensError::Processing(ExpandTokensProcessingError {
-                    rule: variable.name.clone(),
-                    error: e,
-                })
-            })?;
+            .with_context(|| format!("Error processing rule {}", variable.name))?;

        if !is_immediate_token {
            builder.is_sep = true;
            let last_state_id = builder.nfa.last_state_id();
-            builder
-                .expand_rule(&separator_rule, last_state_id)
-                .map_err(ExpandTokensError::ExpandRule)?;
+            builder.expand_rule(&separator_rule, last_state_id)?;
        }

        variables.push(LexicalVariable {
@ -137,64 +134,23 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult
    })
 }

-pub type ExpandRuleResult<T> = Result<T, ExpandRuleError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum ExpandRuleError {
-    #[error("Grammar error: Unexpected rule {0:?}")]
-    UnexpectedRule(Rule),
-    #[error("{0}")]
-    Parse(String),
-    #[error(transparent)]
-    ExpandRegex(ExpandRegexError),
-}
-
-pub type ExpandRegexResult<T> = Result<T, ExpandRegexError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum ExpandRegexError {
-    #[error("{0}")]
-    Utf8(String),
-    #[error("Regex error: Assertions are not supported")]
-    Assertion,
-}
-
 impl NfaBuilder {
-    fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> ExpandRuleResult<bool> {
+    fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
        match rule {
            Rule::Pattern(s, f) => {
-                // With unicode enabled, `\w`, `\s` and `\d` expand to character sets that are much
-                // larger than intended, so we replace them with the actual
-                // character sets they should represent. If the full unicode range
-                // of `\w`, `\s` or `\d` are needed then `\p{L}`, `\p{Z}` and `\p{N}` should be
-                // used.
-                let s = s
-                    .replace(r"\w", r"[0-9A-Za-z_]")
-                    .replace(r"\s", r"[\t-\r ]")
-                    .replace(r"\d", r"[0-9]")
-                    .replace(r"\W", r"[^0-9A-Za-z_]")
-                    .replace(r"\S", r"[^\t-\r ]")
-                    .replace(r"\D", r"[^0-9]");
-                let mut parser = ParserBuilder::new()
-                    .case_insensitive(f.contains('i'))
-                    .unicode(true)
-                    .utf8(false)
-                    .build();
-                let hir = parser
-                    .parse(&s)
-                    .map_err(|e| ExpandRuleError::Parse(e.to_string()))?;
-                self.expand_regex(&hir, next_state_id)
-                    .map_err(ExpandRuleError::ExpandRegex)
+                let s = preprocess_regex(s);
+                let ast = parse::Parser::new().parse(&s)?;
+                self.expand_regex(&ast, next_state_id, f.contains('i'))
            }
            Rule::String(s) => {
                for c in s.chars().rev() {
-                    self.push_advance(CharacterSet::from_char(c), next_state_id);
+                    self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
                    next_state_id = self.nfa.last_state_id();
                }
                Ok(!s.is_empty())
            }
            Rule::Choice(elements) => {
-                let mut alternative_state_ids = Vec::with_capacity(elements.len());
+                let mut alternative_state_ids = Vec::new();
                for element in elements {
                    if self.expand_rule(element, next_state_id)? {
                        alternative_state_ids.push(self.nfa.last_state_id());
@ -248,98 +204,129 @@ impl NfaBuilder {
                result
            }
            Rule::Blank => Ok(false),
-            _ => Err(ExpandRuleError::UnexpectedRule(rule.clone()))?,
+            _ => Err(anyhow!("Grammar error: Unexpected rule {rule:?}")),
        }
    }

-    fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> ExpandRegexResult<bool> {
-        match hir.kind() {
-            HirKind::Empty => Ok(false),
-            HirKind::Literal(literal) => {
-                for character in std::str::from_utf8(&literal.0)
-                    .map_err(|e| ExpandRegexError::Utf8(e.to_string()))?
-                    .chars()
-                    .rev()
-                {
-                    let char_set = CharacterSet::from_char(character);
-                    self.push_advance(char_set, next_state_id);
-                    next_state_id = self.nfa.last_state_id();
-                }
+    fn expand_regex(
+        &mut self,
+        ast: &Ast,
+        mut next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
+        const fn inverse_char(c: char) -> char {
+            match c {
+                'a'..='z' => (c as u8 - b'a' + b'A') as char,
+                'A'..='Z' => (c as u8 - b'A' + b'a') as char,
+                c => c,
+            }
+        }

+        fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet {
+            for char in chars.clone().chars() {
+                let inverted = inverse_char(char);
+                if char != inverted {
+                    chars = chars.add_char(inverted);
+                }
+            }
+            chars
+        }
+
+        match ast {
+            Ast::Empty(_) => Ok(false),
+            Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")),
+            Ast::Literal(literal) => {
+                let mut char_set = CharacterSet::from_char(literal.c);
+                if case_insensitive {
+                    let inverted = inverse_char(literal.c);
+                    if literal.c != inverted {
+                        char_set = char_set.add_char(inverted);
+                    }
+                }
+                self.push_advance(char_set, next_state_id);
                Ok(true)
            }
-            HirKind::Class(class) => match class {
-                Class::Unicode(class) => {
-                    let mut chars = CharacterSet::default();
-                    for c in class.ranges() {
-                        chars = chars.add_range(c.start(), c.end());
-                    }
-
-                    // For some reason, the long s `ſ` is included if the letter `s` is in a
-                    // pattern, so we remove it.
-                    if chars.range_count() == 3
-                        && chars
-                            .ranges()
-                            // exact check to ensure that `ſ` wasn't intentionally added.
-                            .all(|r| ['s'..='s', 'S'..='S', 'ſ'..='ſ'].contains(&r))
-                    {
-                        chars = chars.difference(CharacterSet::from_char('ſ'));
-                    }
-                    self.push_advance(chars, next_state_id);
-                    Ok(true)
+            Ast::Dot(_) => {
+                self.push_advance(CharacterSet::from_char('\n').negate(), next_state_id);
+                Ok(true)
+            }
+            Ast::Assertion(_) => Err(anyhow!("Regex error: Assertions are not supported")),
+            Ast::ClassUnicode(class) => {
+                let mut chars = self.expand_unicode_character_class(&class.kind)?;
+                if class.negated {
+                    chars = chars.negate();
                }
-                Class::Bytes(bytes_class) => {
-                    let mut chars = CharacterSet::default();
-                    for c in bytes_class.ranges() {
-                        chars = chars.add_range(c.start().into(), c.end().into());
-                    }
-                    self.push_advance(chars, next_state_id);
-                    Ok(true)
+                if case_insensitive {
+                    chars = with_inverse_char(chars);
                }
-            },
-            HirKind::Look(_) => Err(ExpandRegexError::Assertion)?,
-            HirKind::Repetition(repetition) => match (repetition.min, repetition.max) {
-                (0, Some(1)) => self.expand_zero_or_one(&repetition.sub, next_state_id),
-                (1, None) => self.expand_one_or_more(&repetition.sub, next_state_id),
-                (0, None) => self.expand_zero_or_more(&repetition.sub, next_state_id),
-                (min, Some(max)) if min == max => {
-                    self.expand_count(&repetition.sub, min, next_state_id)
+                self.push_advance(chars, next_state_id);
+                Ok(true)
+            }
+            Ast::ClassPerl(class) => {
+                let mut chars = self.expand_perl_character_class(&class.kind);
+                if class.negated {
+                    chars = chars.negate();
                }
-                (min, None) => {
-                    if self.expand_zero_or_more(&repetition.sub, next_state_id)? {
-                        self.expand_count(&repetition.sub, min, next_state_id)
+                if case_insensitive {
+                    chars = with_inverse_char(chars);
+                }
+                self.push_advance(chars, next_state_id);
+                Ok(true)
+            }
+            Ast::ClassBracketed(class) => {
+                let mut chars = self.translate_class_set(&class.kind)?;
+                if class.negated {
+                    chars = chars.negate();
+                }
+                if case_insensitive {
+                    chars = with_inverse_char(chars);
+                }
+                self.push_advance(chars, next_state_id);
+                Ok(true)
+            }
+            Ast::Repetition(repetition) => match repetition.op.kind {
+                RepetitionKind::ZeroOrOne => {
+                    self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive)
+                }
+                RepetitionKind::OneOrMore => {
+                    self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive)
+                }
+                RepetitionKind::ZeroOrMore => {
+                    self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)
+                }
+                RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
+                    self.expand_count(&repetition.ast, count, next_state_id, case_insensitive)
+                }
+                RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
+                    if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? {
+                        self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)
                    } else {
                        Ok(false)
                    }
                }
-                (min, Some(max)) => {
-                    let mut result = self.expand_count(&repetition.sub, min, next_state_id)?;
+                RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
+                    let mut result =
+                        self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?;
                    for _ in min..max {
                        if result {
                            next_state_id = self.nfa.last_state_id();
                        }
-                        if self.expand_zero_or_one(&repetition.sub, next_state_id)? {
+                        if self.expand_zero_or_one(
+                            &repetition.ast,
+                            next_state_id,
+                            case_insensitive,
+                        )? {
                            result = true;
                        }
                    }
                    Ok(result)
                }
            },
-            HirKind::Capture(capture) => self.expand_regex(&capture.sub, next_state_id),
-            HirKind::Concat(concat) => {
-                let mut result = false;
-                for hir in concat.iter().rev() {
-                    if self.expand_regex(hir, next_state_id)? {
-                        result = true;
-                        next_state_id = self.nfa.last_state_id();
-                    }
-                }
-                Ok(result)
-            }
-            HirKind::Alternation(alternations) => {
-                let mut alternative_state_ids = Vec::with_capacity(alternations.len());
-                for hir in alternations {
-                    if self.expand_regex(hir, next_state_id)? {
+            Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive),
+            Ast::Alternation(alternation) => {
+                let mut alternative_state_ids = Vec::new();
+                for ast in &alternation.asts {
+                    if self.expand_regex(ast, next_state_id, case_insensitive)? {
                        alternative_state_ids.push(self.nfa.last_state_id());
                    } else {
                        alternative_state_ids.push(next_state_id);
@ -348,21 +335,58 @@ impl NfaBuilder {
                alternative_state_ids.sort_unstable();
                alternative_state_ids.dedup();
                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
+
                for alternative_state_id in alternative_state_ids {
                    self.push_split(alternative_state_id);
                }
                Ok(true)
            }
+            Ast::Concat(concat) => {
+                let mut result = false;
+                for ast in concat.asts.iter().rev() {
+                    if self.expand_regex(ast, next_state_id, case_insensitive)? {
+                        result = true;
+                        next_state_id = self.nfa.last_state_id();
+                    }
+                }
+                Ok(result)
+            }
        }
    }

-    fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
+    fn translate_class_set(&self, class_set: &ClassSet) -> Result<CharacterSet> {
+        match &class_set {
+            ClassSet::Item(item) => self.expand_character_class(item),
+            ClassSet::BinaryOp(binary_op) => {
+                let mut lhs_char_class = self.translate_class_set(&binary_op.lhs)?;
+                let mut rhs_char_class = self.translate_class_set(&binary_op.rhs)?;
+                match binary_op.kind {
+                    ClassSetBinaryOpKind::Intersection => {
+                        Ok(lhs_char_class.remove_intersection(&mut rhs_char_class))
+                    }
+                    ClassSetBinaryOpKind::Difference => {
+                        Ok(lhs_char_class.difference(rhs_char_class))
+                    }
+                    ClassSetBinaryOpKind::SymmetricDifference => {
+                        Ok(lhs_char_class.symmetric_difference(rhs_char_class))
+                    }
+                }
+            }
+        }
+    }
+
+    fn expand_one_or_more(
+        &mut self,
+        ast: &Ast,
+        next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
        self.nfa.states.push(NfaState::Accept {
            variable_index: 0,
            precedence: 0,
        }); // Placeholder for split
        let split_state_id = self.nfa.last_state_id();
-        if self.expand_regex(hir, split_state_id)? {
+        if self.expand_regex(ast, split_state_id, case_insensitive)? {
            self.nfa.states[split_state_id as usize] =
                NfaState::Split(self.nfa.last_state_id(), next_state_id);
            Ok(true)
@ -372,8 +396,13 @@ impl NfaBuilder {
        }
    }

-    fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
-        if self.expand_regex(hir, next_state_id)? {
+    fn expand_zero_or_one(
+        &mut self,
+        ast: &Ast,
+        next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
+        if self.expand_regex(ast, next_state_id, case_insensitive)? {
            self.push_split(next_state_id);
            Ok(true)
        } else {
@ -381,8 +410,13 @@ impl NfaBuilder {
        }
    }

-    fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
-        if self.expand_one_or_more(hir, next_state_id)? {
+    fn expand_zero_or_more(
+        &mut self,
+        ast: &Ast,
+        next_state_id: u32,
+        case_insensitive: bool,
+    ) -> Result<bool> {
+        if self.expand_one_or_more(ast, next_state_id, case_insensitive)? {
            self.push_split(next_state_id);
            Ok(true)
        } else {
@ -392,13 +426,14 @@ impl NfaBuilder {

    fn expand_count(
        &mut self,
-        hir: &Hir,
+        ast: &Ast,
        count: u32,
        mut next_state_id: u32,
-    ) -> ExpandRegexResult<bool> {
+        case_insensitive: bool,
+    ) -> Result<bool> {
        let mut result = false;
        for _ in 0..count {
-            if self.expand_regex(hir, next_state_id)? {
+            if self.expand_regex(ast, next_state_id, case_insensitive)? {
                result = true;
                next_state_id = self.nfa.last_state_id();
            }
@ -406,6 +441,111 @@ impl NfaBuilder {
        Ok(result)
    }

+    fn expand_character_class(&self, item: &ClassSetItem) -> Result<CharacterSet> {
+        match item {
+            ClassSetItem::Empty(_) => Ok(CharacterSet::empty()),
+            ClassSetItem::Literal(literal) => Ok(CharacterSet::from_char(literal.c)),
+            ClassSetItem::Range(range) => Ok(CharacterSet::from_range(range.start.c, range.end.c)),
+            ClassSetItem::Union(union) => {
+                let mut result = CharacterSet::empty();
+                for item in &union.items {
+                    result = result.add(&self.expand_character_class(item)?);
+                }
+                Ok(result)
+            }
+            ClassSetItem::Perl(class) => Ok(self.expand_perl_character_class(&class.kind)),
+            ClassSetItem::Unicode(class) => {
+                let mut set = self.expand_unicode_character_class(&class.kind)?;
+                if class.negated {
+                    set = set.negate();
+                }
+                Ok(set)
+            }
+            ClassSetItem::Bracketed(class) => {
+                let mut set = self.translate_class_set(&class.kind)?;
+                if class.negated {
+                    set = set.negate();
+                }
+                Ok(set)
+            }
+            ClassSetItem::Ascii(_) => Err(anyhow!(
+                "Regex error: Unsupported character class syntax {item:?}",
+            )),
+        }
+    }
+
+    fn expand_unicode_character_class(&self, class: &ClassUnicodeKind) -> Result<CharacterSet> {
+        let mut chars = CharacterSet::empty();
+
+        let category_letter;
+        match class {
+            ClassUnicodeKind::OneLetter(le) => {
+                category_letter = le.to_string();
+            }
+            ClassUnicodeKind::Named(class_name) => {
+                let actual_class_name = UNICODE_CATEGORY_ALIASES
+                    .get(class_name.as_str())
+                    .or_else(|| UNICODE_PROPERTY_ALIASES.get(class_name.as_str()))
+                    .unwrap_or(class_name);
+                if actual_class_name.len() == 1 {
+                    category_letter = actual_class_name.clone();
+                } else {
+                    let code_points =
+                        UNICODE_CATEGORIES
+                            .get(actual_class_name.as_str())
+                            .or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str()))
+                            .ok_or_else(|| {
+                                anyhow!(
+                                    "Regex error: Unsupported unicode character class {class_name}",
+                                )
+                            })?;
+                    for c in code_points {
+                        if let Some(c) = std::char::from_u32(*c) {
+                            chars = chars.add_char(c);
+                        }
+                    }
+
+                    return Ok(chars);
+                }
+            }
+            ClassUnicodeKind::NamedValue { .. } => {
+                return Err(anyhow!(
+                    "Regex error: Key-value unicode properties are not supported"
+                ))
+            }
+        }
+
+        for (category, code_points) in UNICODE_CATEGORIES.iter() {
+            if category.starts_with(&category_letter) {
+                for c in code_points {
+                    if let Some(c) = std::char::from_u32(*c) {
+                        chars = chars.add_char(c);
+                    }
+                }
+            }
+        }
+
+        Ok(chars)
+    }
+
+    fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet {
+        match item {
+            ClassPerlKind::Digit => CharacterSet::from_range('0', '9'),
+            ClassPerlKind::Space => CharacterSet::empty()
+                .add_char(' ')
+                .add_char('\t')
+                .add_char('\r')
+                .add_char('\n')
+                .add_char('\x0B')
+                .add_char('\x0C'),
+            ClassPerlKind::Word => CharacterSet::empty()
+                .add_char('_')
+                .add_range('A', 'Z')
+                .add_range('a', 'z')
+                .add_range('0', '9'),
+        }
+    }
+
    fn push_advance(&mut self, chars: CharacterSet, state_id: u32) {
        let precedence = *self.precedence_stack.last().unwrap();
        self.nfa.states.push(NfaState::Advance {
@ -427,10 +567,8 @@ impl NfaBuilder {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::{
-        grammars::Variable,
-        nfa::{NfaCursor, NfaTransition},
-    };
+    use crate::generate::grammars::Variable;
+    use crate::generate::nfa::{NfaCursor, NfaTransition};

    fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
        let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
@ -709,9 +847,11 @@ mod tests {
                    ("\u{00df}", Some((3, "\u{00df}"))),
                ],
            },
+            // allowing un-escaped curly braces
            Row {
                rules: vec![
-                    Rule::pattern(r"u\{[0-9a-fA-F]+\}", ""),
+                    // Un-escaped curly braces
+                    Rule::pattern(r"u{[0-9a-fA-F]+}", ""),
                    // Already-escaped curly braces
                    Rule::pattern(r"\{[ab]{3}\}", ""),
                    // Unicode codepoints
--- a/crates/generate/src/prepare_grammar/extract_default_aliases.rs
+++ b/crates/generate/src/prepare_grammar/extract_default_aliases.rs
@ -1,7 +1,5 @@
-use crate::{
-    grammars::{LexicalGrammar, SyntaxGrammar},
-    rules::{Alias, AliasMap, Symbol, SymbolType},
-};
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};

 #[derive(Clone, Default)]
 struct SymbolStatus {
@ -16,8 +14,8 @@ struct SymbolStatus {
 // This has two benefits:
 // * It reduces the overhead of storing production-specific alias info in the parse table.
 // * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
-//   ensures that the children of an `ERROR` node have symbols that are consistent with the way that
-//   they would appear in a valid syntax tree.
+//   ensures that the children of an `ERROR` node have symbols that are consistent with the
+//   way that they would appear in a valid syntax tree.
 pub(super) fn extract_default_aliases(
    syntax_grammar: &mut SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
@ -69,7 +67,9 @@ pub(super) fn extract_default_aliases(
            SymbolType::External => &mut external_status_list[symbol.index],
            SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
            SymbolType::Terminal => &mut terminal_status_list[symbol.index],
-            SymbolType::End | SymbolType::EndOfNonTerminalExtra => panic!("Unexpected end token"),
+            SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
+                panic!("Unexpected end token")
+            }
        };
        status.appears_unaliased = true;
    }
@ -162,10 +162,10 @@ pub(super) fn extract_default_aliases(
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::{
-        grammars::{LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType},
-        nfa::Nfa,
+    use crate::generate::grammars::{
+        LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
    };
+    use crate::generate::nfa::Nfa;

    #[test]
    fn test_extract_simple_aliases() {
--- a/crates/generate/src/prepare_grammar/extract_tokens.rs
+++ b/crates/generate/src/prepare_grammar/extract_tokens.rs
@ -1,82 +1,35 @@
-use std::collections::HashMap;
-
-use serde::Serialize;
-use thiserror::Error;
-
 use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
-use crate::{
-    grammars::{ExternalToken, ReservedWordContext, Variable, VariableType},
-    rules::{MetadataParams, Rule, Symbol, SymbolType},
-};
-
-pub type ExtractTokensResult<T> = Result<T, ExtractTokensError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum ExtractTokensError {
-    #[error(
-        "The rule `{0}` contains an empty string.
-
-Tree-sitter does not support syntactic rules that contain an empty string
-unless they are used only as the grammar's start rule.
-"
-    )]
-    EmptyString(String),
-    #[error("Rule '{0}' cannot be used as both an external token and a non-terminal rule")]
-    ExternalTokenNonTerminal(String),
-    #[error("Non-symbol rules cannot be used as external tokens")]
-    NonSymbolExternalToken,
-    #[error(transparent)]
-    WordToken(NonTerminalWordTokenError),
-    #[error("Reserved word '{0}' must be a token")]
-    NonTokenReservedWord(String),
-}
-
-#[derive(Debug, Error, Serialize)]
-pub struct NonTerminalWordTokenError {
-    pub symbol_name: String,
-    pub conflicting_symbol_name: Option<String>,
-}
-
-impl std::fmt::Display for NonTerminalWordTokenError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "Non-terminal symbol '{}' cannot be used as the word token",
-            self.symbol_name
-        )?;
-        if let Some(conflicting_name) = &self.conflicting_symbol_name {
-            writeln!(
-                f,
-                ", because its rule is duplicated in '{conflicting_name}'",
-            )
-        } else {
-            writeln!(f)
-        }
-    }
-}
+use crate::generate::grammars::{ExternalToken, Variable, VariableType};
+use crate::generate::rules::{MetadataParams, Rule, Symbol, SymbolType};
+use anyhow::{anyhow, Result};
+use std::collections::HashMap;
+use std::mem;

 pub(super) fn extract_tokens(
    mut grammar: InternedGrammar,
-) -> ExtractTokensResult<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
+) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
    let mut extractor = TokenExtractor {
        current_variable_name: String::new(),
        current_variable_token_count: 0,
-        is_first_rule: false,
        extracted_variables: Vec::new(),
        extracted_usage_counts: Vec::new(),
    };

-    for (i, variable) in &mut grammar.variables.iter_mut().enumerate() {
-        extractor.extract_tokens_in_variable(i == 0, variable)?;
+    for variable in &mut grammar.variables {
+        extractor.extract_tokens_in_variable(variable);
    }

    for variable in &mut grammar.external_tokens {
-        extractor.extract_tokens_in_variable(false, variable)?;
+        extractor.extract_tokens_in_variable(variable);
    }

    let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
    for variable in extractor.extracted_variables {
-        lexical_variables.push(variable);
+        lexical_variables.push(Variable {
+            name: variable.name,
+            kind: variable.kind,
+            rule: variable.rule,
+        });
    }

    // If a variable's entire rule was extracted as a token and that token didn't
@ -85,7 +38,7 @@ pub(super) fn extract_tokens(
    // that pointed to that variable will need to be updated to point to the
    // variable in the lexical grammar. Symbols that pointed to later variables
    // will need to have their indices decremented.
-    let mut variables = Vec::with_capacity(grammar.variables.len());
+    let mut variables = Vec::new();
    let mut symbol_replacer = SymbolReplacer {
        replacements: HashMap::new(),
    };
@ -97,14 +50,10 @@ pub(super) fn extract_tokens(
        {
            if i > 0 && extractor.extracted_usage_counts[index] == 1 {
                let lexical_variable = &mut lexical_variables[index];
-                if lexical_variable.kind == VariableType::Auxiliary
-                    || variable.kind != VariableType::Hidden
-                {
-                    lexical_variable.kind = variable.kind;
-                    lexical_variable.name = variable.name;
-                    symbol_replacer.replacements.insert(i, index);
-                    continue;
-                }
+                lexical_variable.kind = variable.kind;
+                lexical_variable.name = variable.name;
+                symbol_replacer.replacements.insert(i, index);
+                continue;
            }
        }
        variables.push(variable);
@ -118,10 +67,10 @@ pub(super) fn extract_tokens(
        .expected_conflicts
        .into_iter()
        .map(|conflict| {
-            let mut result = conflict
+            let mut result: Vec<_> = conflict
                .iter()
                .map(|symbol| symbol_replacer.replace_symbol(*symbol))
-                .collect::<Vec<_>>();
+                .collect();
            result.sort_unstable();
            result.dedup();
            result
@ -152,14 +101,15 @@ pub(super) fn extract_tokens(
        }
    }

-    let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
+    let mut external_tokens = Vec::new();
    for external_token in grammar.external_tokens {
        let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
        if let Rule::Symbol(symbol) = rule {
            if symbol.is_non_terminal() {
-                Err(ExtractTokensError::ExternalTokenNonTerminal(
-                    variables[symbol.index].name.clone(),
-                ))?;
+                return Err(anyhow!(
+                    "Rule '{}' cannot be used as both an external token and a non-terminal rule",
+                    &variables[symbol.index].name,
+                ));
            }

            if symbol.is_external() {
@ -176,59 +126,22 @@ pub(super) fn extract_tokens(
                });
            }
        } else {
-            Err(ExtractTokensError::NonSymbolExternalToken)?;
+            return Err(anyhow!(
+                "Non-symbol rules cannot be used as external tokens"
+            ));
        }
    }

-    let word_token = if let Some(token) = grammar.word_token {
+    let mut word_token = None;
+    if let Some(token) = grammar.word_token {
        let token = symbol_replacer.replace_symbol(token);
        if token.is_non_terminal() {
-            let word_token_variable = &variables[token.index];
-            let conflicting_symbol_name = variables
-                .iter()
-                .enumerate()
-                .find(|(i, v)| *i != token.index && v.rule == word_token_variable.rule)
-                .map(|(_, v)| v.name.clone());
-
-            Err(ExtractTokensError::WordToken(NonTerminalWordTokenError {
-                symbol_name: word_token_variable.name.clone(),
-                conflicting_symbol_name,
-            }))?;
+            return Err(anyhow!(
+                "Non-terminal symbol '{}' cannot be used as the word token",
+                &variables[token.index].name
+            ));
        }
-        Some(token)
-    } else {
-        None
-    };
-
-    let mut reserved_word_contexts = Vec::with_capacity(grammar.reserved_word_sets.len());
-    for reserved_word_context in grammar.reserved_word_sets {
-        let mut reserved_words = Vec::with_capacity(reserved_word_contexts.len());
-        for reserved_rule in reserved_word_context.reserved_words {
-            if let Rule::Symbol(symbol) = reserved_rule {
-                reserved_words.push(symbol_replacer.replace_symbol(symbol));
-            } else if let Some(index) = lexical_variables
-                .iter()
-                .position(|v| v.rule == reserved_rule)
-            {
-                reserved_words.push(Symbol::terminal(index));
-            } else {
-                let rule = if let Rule::Metadata { rule, .. } = &reserved_rule {
-                    rule.as_ref()
-                } else {
-                    &reserved_rule
-                };
-                let token_name = match rule {
-                    Rule::String(s) => s.clone(),
-                    Rule::Pattern(p, _) => p.clone(),
-                    _ => "unknown".to_string(),
-                };
-                Err(ExtractTokensError::NonTokenReservedWord(token_name))?;
-            }
-        }
-        reserved_word_contexts.push(ReservedWordContext {
-            name: reserved_word_context.name,
-            reserved_words,
-        });
+        word_token = Some(token);
    }

    Ok((
@ -241,7 +154,6 @@ pub(super) fn extract_tokens(
            external_tokens,
            word_token,
            precedence_orderings: grammar.precedence_orderings,
-            reserved_word_sets: reserved_word_contexts,
        },
        ExtractedLexicalGrammar {
            variables: lexical_variables,
@ -253,7 +165,6 @@ pub(super) fn extract_tokens(
 struct TokenExtractor {
    current_variable_name: String,
    current_variable_token_count: usize,
-    is_first_rule: bool,
    extracted_variables: Vec<Variable>,
    extracted_usage_counts: Vec<usize>,
 }
@ -263,33 +174,28 @@ struct SymbolReplacer {
 }

 impl TokenExtractor {
-    fn extract_tokens_in_variable(
-        &mut self,
-        is_first: bool,
-        variable: &mut Variable,
-    ) -> ExtractTokensResult<()> {
+    fn extract_tokens_in_variable(&mut self, variable: &mut Variable) {
        self.current_variable_name.clear();
        self.current_variable_name.push_str(&variable.name);
        self.current_variable_token_count = 0;
-        self.is_first_rule = is_first;
-        variable.rule = self.extract_tokens_in_rule(&variable.rule)?;
-        Ok(())
+        let mut rule = Rule::Blank;
+        mem::swap(&mut rule, &mut variable.rule);
+        variable.rule = self.extract_tokens_in_rule(&rule);
    }

-    fn extract_tokens_in_rule(&mut self, input: &Rule) -> ExtractTokensResult<Rule> {
+    fn extract_tokens_in_rule(&mut self, input: &Rule) -> Rule {
        match input {
-            Rule::String(name) => Ok(self.extract_token(input, Some(name))?.into()),
-            Rule::Pattern(..) => Ok(self.extract_token(input, None)?.into()),
+            Rule::String(name) => self.extract_token(input, Some(name)).into(),
+            Rule::Pattern(..) => self.extract_token(input, None).into(),
            Rule::Metadata { params, rule } => {
                if params.is_token {
                    let mut params = params.clone();
                    params.is_token = false;

-                    let string_value = if let Rule::String(value) = rule.as_ref() {
-                        Some(value)
-                    } else {
-                        None
-                    };
+                    let mut string_value = None;
+                    if let Rule::String(value) = rule.as_ref() {
+                        string_value = Some(value);
+                    }

                    let rule_to_extract = if params == MetadataParams::default() {
                        rule.as_ref()
@ -297,56 +203,41 @@ impl TokenExtractor {
                        input
                    };

-                    Ok(self.extract_token(rule_to_extract, string_value)?.into())
+                    self.extract_token(rule_to_extract, string_value).into()
                } else {
-                    Ok(Rule::Metadata {
+                    Rule::Metadata {
                        params: params.clone(),
-                        rule: Box::new(self.extract_tokens_in_rule(rule)?),
-                    })
+                        rule: Box::new(self.extract_tokens_in_rule(rule)),
+                    }
                }
            }
-            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(
-                self.extract_tokens_in_rule(content)?,
-            ))),
-            Rule::Seq(elements) => Ok(Rule::Seq(
+            Rule::Repeat(content) => Rule::Repeat(Box::new(self.extract_tokens_in_rule(content))),
+            Rule::Seq(elements) => Rule::Seq(
                elements
                    .iter()
                    .map(|e| self.extract_tokens_in_rule(e))
-                    .collect::<ExtractTokensResult<Vec<_>>>()?,
-            )),
-            Rule::Choice(elements) => Ok(Rule::Choice(
+                    .collect(),
+            ),
+            Rule::Choice(elements) => Rule::Choice(
                elements
                    .iter()
                    .map(|e| self.extract_tokens_in_rule(e))
-                    .collect::<ExtractTokensResult<Vec<_>>>()?,
-            )),
-            Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
-                rule: Box::new(self.extract_tokens_in_rule(rule)?),
-                context_name: context_name.clone(),
-            }),
-            _ => Ok(input.clone()),
+                    .collect(),
+            ),
+            _ => input.clone(),
        }
    }

-    fn extract_token(
-        &mut self,
-        rule: &Rule,
-        string_value: Option<&String>,
-    ) -> ExtractTokensResult<Symbol> {
+    fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Symbol {
        for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
            if variable.rule == *rule {
                self.extracted_usage_counts[i] += 1;
-                return Ok(Symbol::terminal(i));
+                return Symbol::terminal(i);
            }
        }

        let index = self.extracted_variables.len();
        let variable = if let Some(string_value) = string_value {
-            if string_value.is_empty() && !self.is_first_rule {
-                Err(ExtractTokensError::EmptyString(
-                    self.current_variable_name.clone(),
-                ))?;
-            }
            Variable {
                name: string_value.clone(),
                kind: VariableType::Anonymous,
@ -357,7 +248,7 @@ impl TokenExtractor {
            Variable {
                name: format!(
                    "{}_token{}",
-                    self.current_variable_name, self.current_variable_token_count
+                    &self.current_variable_name, self.current_variable_token_count
                ),
                kind: VariableType::Auxiliary,
                rule: rule.clone(),
@ -366,7 +257,7 @@ impl TokenExtractor {

        self.extracted_variables.push(variable);
        self.extracted_usage_counts.push(1);
-        Ok(Symbol::terminal(index))
+        Symbol::terminal(index)
    }
 }

@ -391,10 +282,6 @@ impl SymbolReplacer {
                params: params.clone(),
                rule: Box::new(self.replace_symbols_in_rule(rule)),
            },
-            Rule::Reserved { rule, context_name } => Rule::Reserved {
-                rule: Box::new(self.replace_symbols_in_rule(rule)),
-                context_name: context_name.clone(),
-            },
            _ => rule.clone(),
        }
    }
@ -422,6 +309,7 @@ impl SymbolReplacer {
 #[cfg(test)]
 mod test {
    use super::*;
+    use crate::generate::grammars::VariableType;

    #[test]
    fn test_extraction() {
@ -590,48 +478,14 @@ mod test {
        ]);
        grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];

-        let result = extract_tokens(grammar);
-        assert!(result.is_err(), "Expected an error but got no error");
-        let err = result.err().unwrap();
-        assert_eq!(
-            err.to_string(),
-            "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule"
-        );
-    }
-
-    #[test]
-    fn test_extraction_on_hidden_terminal() {
-        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
-            Variable::named("rule_0", Rule::non_terminal(1)),
-            Variable::hidden("_rule_1", Rule::string("a")),
-        ]))
-        .unwrap();
-
-        // The rule `_rule_1` should not "absorb" the
-        // terminal "a", since it is hidden,
-        // so we expect two variables still
-        assert_eq!(
-            syntax_grammar.variables,
-            vec![
-                Variable::named("rule_0", Rule::non_terminal(1)),
-                Variable::hidden("_rule_1", Rule::terminal(0)),
-            ]
-        );
-
-        // We should not have a hidden rule in our lexical grammar, only the terminal "a"
-        assert_eq!(
-            lexical_grammar.variables,
-            vec![Variable::anonymous("a", Rule::string("a"))]
-        );
-    }
-
-    #[test]
-    fn test_extraction_with_empty_string() {
-        assert!(extract_tokens(build_grammar(vec![
-            Variable::named("rule_0", Rule::non_terminal(1)),
-            Variable::hidden("_rule_1", Rule::string("")),
-        ]))
-        .is_err());
+        match extract_tokens(grammar) {
+            Err(e) => {
+                assert_eq!(e.to_string(), "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
+            }
+            _ => {
+                panic!("Expected an error but got no error");
+            }
+        }
    }

    fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
--- a/crates/generate/src/prepare_grammar/flatten_grammar.rs
+++ b/crates/generate/src/prepare_grammar/flatten_grammar.rs
@ -1,96 +1,46 @@
-use std::collections::HashMap;
-
-use serde::Serialize;
-use thiserror::Error;
-
 use super::ExtractedSyntaxGrammar;
-use crate::{
-    grammars::{
-        Production, ProductionStep, ReservedWordSetId, SyntaxGrammar, SyntaxVariable, Variable,
-    },
-    rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
+use crate::generate::grammars::{
+    Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
 };
-
-pub type FlattenGrammarResult<T> = Result<T, FlattenGrammarError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum FlattenGrammarError {
-    #[error("No such reserved word set: {0}")]
-    NoReservedWordSet(String),
-    #[error(
-        "The rule `{0}` matches the empty string.
-
-Tree-sitter does not support syntactic rules that match the empty string
-unless they are used only as the grammar's start rule.
-"
-    )]
-    EmptyString(String),
-    #[error("Rule `{0}` cannot be inlined because it contains a reference to itself")]
-    RecursiveInline(String),
-}
+use crate::generate::rules::{Alias, Associativity, Precedence, Rule, Symbol};
+use anyhow::{anyhow, Result};

 struct RuleFlattener {
    production: Production,
-    reserved_word_set_ids: HashMap<String, ReservedWordSetId>,
    precedence_stack: Vec<Precedence>,
    associativity_stack: Vec<Associativity>,
-    reserved_word_stack: Vec<ReservedWordSetId>,
    alias_stack: Vec<Alias>,
    field_name_stack: Vec<String>,
 }

 impl RuleFlattener {
-    const fn new(reserved_word_set_ids: HashMap<String, ReservedWordSetId>) -> Self {
+    fn new() -> Self {
        Self {
            production: Production {
                steps: Vec::new(),
                dynamic_precedence: 0,
            },
-            reserved_word_set_ids,
            precedence_stack: Vec::new(),
            associativity_stack: Vec::new(),
-            reserved_word_stack: Vec::new(),
            alias_stack: Vec::new(),
            field_name_stack: Vec::new(),
        }
    }

-    fn flatten_variable(&mut self, variable: Variable) -> FlattenGrammarResult<SyntaxVariable> {
-        let choices = extract_choices(variable.rule);
-        let mut productions = Vec::with_capacity(choices.len());
-        for rule in choices {
-            let production = self.flatten_rule(rule)?;
-            if !productions.contains(&production) {
-                productions.push(production);
-            }
-        }
-        Ok(SyntaxVariable {
-            name: variable.name,
-            kind: variable.kind,
-            productions,
-        })
+    fn flatten(mut self, rule: Rule) -> Production {
+        self.apply(rule, true);
+        self.production
    }

-    fn flatten_rule(&mut self, rule: Rule) -> FlattenGrammarResult<Production> {
-        self.production = Production::default();
-        self.alias_stack.clear();
-        self.reserved_word_stack.clear();
-        self.precedence_stack.clear();
-        self.associativity_stack.clear();
-        self.field_name_stack.clear();
-        self.apply(rule, true)?;
-        Ok(self.production.clone())
-    }
-
-    fn apply(&mut self, rule: Rule, at_end: bool) -> FlattenGrammarResult<bool> {
+    fn apply(&mut self, rule: Rule, at_end: bool) -> bool {
        match rule {
            Rule::Seq(members) => {
                let mut result = false;
                let last_index = members.len() - 1;
                for (i, member) in members.into_iter().enumerate() {
-                    result |= self.apply(member, i == last_index && at_end)?;
+                    result |= self.apply(member, i == last_index && at_end);
                }
-                Ok(result)
+                result
            }
            Rule::Metadata { rule, params } => {
                let mut has_precedence = false;
@ -121,7 +71,7 @@ impl RuleFlattener {
                    self.production.dynamic_precedence = params.dynamic_precedence;
                }

-                let did_push = self.apply(*rule, at_end)?;
+                let did_push = self.apply(*rule, at_end);

                if has_precedence {
                    self.precedence_stack.pop();
@ -150,20 +100,7 @@ impl RuleFlattener {
                    self.field_name_stack.pop();
                }

-                Ok(did_push)
-            }
-            Rule::Reserved { rule, context_name } => {
-                self.reserved_word_stack.push(
-                    self.reserved_word_set_ids
-                        .get(&context_name)
-                        .copied()
-                        .ok_or_else(|| {
-                            FlattenGrammarError::NoReservedWordSet(context_name.clone())
-                        })?,
-                );
-                let did_push = self.apply(*rule, at_end)?;
-                self.reserved_word_stack.pop();
-                Ok(did_push)
+                did_push
            }
            Rule::Symbol(symbol) => {
                self.production.steps.push(ProductionStep {
@ -174,17 +111,12 @@ impl RuleFlattener {
                        .cloned()
                        .unwrap_or(Precedence::None),
                    associativity: self.associativity_stack.last().copied(),
-                    reserved_word_set_id: self
-                        .reserved_word_stack
-                        .last()
-                        .copied()
-                        .unwrap_or(ReservedWordSetId::default()),
                    alias: self.alias_stack.last().cloned(),
                    field_name: self.field_name_stack.last().cloned(),
                });
-                Ok(true)
+                true
            }
-            _ => Ok(false),
+            _ => false,
        }
    }
 }
@ -195,7 +127,7 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
            let mut result = vec![Rule::Blank];
            for element in elements {
                let extraction = extract_choices(element);
-                let mut next_result = Vec::with_capacity(result.len());
+                let mut next_result = Vec::new();
                for entry in result {
                    for extraction_entry in &extraction {
                        next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
@ -206,7 +138,7 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
            result
        }
        Rule::Choice(elements) => {
-            let mut result = Vec::with_capacity(elements.len());
+            let mut result = Vec::new();
            for element in elements {
                for rule in extract_choices(element) {
                    result.push(rule);
@ -221,17 +153,25 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
                params: params.clone(),
            })
            .collect(),
-        Rule::Reserved { rule, context_name } => extract_choices(*rule)
-            .into_iter()
-            .map(|rule| Rule::Reserved {
-                rule: Box::new(rule),
-                context_name: context_name.clone(),
-            })
-            .collect(),
        _ => vec![rule],
    }
 }

+fn flatten_variable(variable: Variable) -> SyntaxVariable {
+    let mut productions = Vec::new();
+    for rule in extract_choices(variable.rule) {
+        let production = RuleFlattener::new().flatten(rule);
+        if !productions.contains(&production) {
+            productions.push(production);
+        }
+    }
+    SyntaxVariable {
+        name: variable.name,
+        kind: variable.kind,
+        productions,
+    }
+}
+
 fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
    for variable in variables {
        for production in &variable.productions {
@ -245,48 +185,25 @@ fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
    false
 }

-pub(super) fn flatten_grammar(
-    grammar: ExtractedSyntaxGrammar,
-) -> FlattenGrammarResult<SyntaxGrammar> {
-    let mut reserved_word_set_ids_by_name = HashMap::new();
-    for (ix, set) in grammar.reserved_word_sets.iter().enumerate() {
-        reserved_word_set_ids_by_name.insert(set.name.clone(), ReservedWordSetId(ix));
+pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
+    let mut variables = Vec::new();
+    for variable in grammar.variables {
+        variables.push(flatten_variable(variable));
    }
-
-    let mut flattener = RuleFlattener::new(reserved_word_set_ids_by_name);
-    let variables = grammar
-        .variables
-        .into_iter()
-        .map(|variable| flattener.flatten_variable(variable))
-        .collect::<FlattenGrammarResult<Vec<_>>>()?;
-
    for (i, variable) in variables.iter().enumerate() {
-        let symbol = Symbol::non_terminal(i);
-        let used = symbol_is_used(&variables, symbol);
-
        for production in &variable.productions {
-            if used && production.steps.is_empty() {
-                Err(FlattenGrammarError::EmptyString(variable.name.clone()))?;
-            }
+            if production.steps.is_empty() && symbol_is_used(&variables, Symbol::non_terminal(i)) {
+                return Err(anyhow!(
+                    "The rule `{}` matches the empty string.

-            if grammar.variables_to_inline.contains(&symbol)
-                && production.steps.iter().any(|step| step.symbol == symbol)
-            {
-                Err(FlattenGrammarError::RecursiveInline(variable.name.clone()))?;
+Tree-sitter does not support syntactic rules that match the empty string
+unless they are used only as the grammar's start rule.
+",
+                    variable.name
+                ));
            }
        }
    }
-    let mut reserved_word_sets = grammar
-        .reserved_word_sets
-        .into_iter()
-        .map(|set| set.reserved_words.into_iter().collect())
-        .collect::<Vec<_>>();
-
-    // If no default reserved word set is specified, there are no reserved words.
-    if reserved_word_sets.is_empty() {
-        reserved_word_sets.push(TokenSet::default());
-    }
-
    Ok(SyntaxGrammar {
        extra_symbols: grammar.extra_symbols,
        expected_conflicts: grammar.expected_conflicts,
@ -295,7 +212,6 @@ pub(super) fn flatten_grammar(
        external_tokens: grammar.external_tokens,
        supertype_symbols: grammar.supertype_symbols,
        word_token: grammar.word_token,
-        reserved_word_sets,
        variables,
    })
 }
@ -303,35 +219,33 @@ pub(super) fn flatten_grammar(
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::grammars::VariableType;
+    use crate::generate::grammars::VariableType;
+    use crate::generate::rules::Symbol;

    #[test]
    fn test_flatten_grammar() {
-        let mut flattener = RuleFlattener::new(HashMap::default());
-        let result = flattener
-            .flatten_variable(Variable {
-                name: "test".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::seq(vec![
-                    Rule::non_terminal(1),
-                    Rule::prec_left(
-                        Precedence::Integer(101),
-                        Rule::seq(vec![
-                            Rule::non_terminal(2),
-                            Rule::choice(vec![
-                                Rule::prec_right(
-                                    Precedence::Integer(102),
-                                    Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
-                                ),
-                                Rule::non_terminal(5),
-                            ]),
-                            Rule::non_terminal(6),
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::prec_left(
+                    Precedence::Integer(101),
+                    Rule::seq(vec![
+                        Rule::non_terminal(2),
+                        Rule::choice(vec![
+                            Rule::prec_right(
+                                Precedence::Integer(102),
+                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
+                            ),
+                            Rule::non_terminal(5),
                        ]),
-                    ),
-                    Rule::non_terminal(7),
-                ]),
-            })
-            .unwrap();
+                        Rule::non_terminal(6),
+                    ]),
+                ),
+                Rule::non_terminal(7),
+            ]),
+        });

        assert_eq!(
            result.productions,
@ -368,31 +282,28 @@ mod tests {

    #[test]
    fn test_flatten_grammar_with_maximum_dynamic_precedence() {
-        let mut flattener = RuleFlattener::new(HashMap::default());
-        let result = flattener
-            .flatten_variable(Variable {
-                name: "test".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::seq(vec![
-                    Rule::non_terminal(1),
-                    Rule::prec_dynamic(
-                        101,
-                        Rule::seq(vec![
-                            Rule::non_terminal(2),
-                            Rule::choice(vec![
-                                Rule::prec_dynamic(
-                                    102,
-                                    Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
-                                ),
-                                Rule::non_terminal(5),
-                            ]),
-                            Rule::non_terminal(6),
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::prec_dynamic(
+                    101,
+                    Rule::seq(vec![
+                        Rule::non_terminal(2),
+                        Rule::choice(vec![
+                            Rule::prec_dynamic(
+                                102,
+                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
+                            ),
+                            Rule::non_terminal(5),
                        ]),
-                    ),
-                    Rule::non_terminal(7),
-                ]),
-            })
-            .unwrap();
+                        Rule::non_terminal(6),
+                    ]),
+                ),
+                Rule::non_terminal(7),
+            ]),
+        });

        assert_eq!(
            result.productions,
@ -424,17 +335,14 @@ mod tests {

    #[test]
    fn test_flatten_grammar_with_final_precedence() {
-        let mut flattener = RuleFlattener::new(HashMap::default());
-        let result = flattener
-            .flatten_variable(Variable {
-                name: "test".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::prec_left(
-                    Precedence::Integer(101),
-                    Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
-                ),
-            })
-            .unwrap();
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::prec_left(
+                Precedence::Integer(101),
+                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
+            ),
+        });

        assert_eq!(
            result.productions,
@ -449,16 +357,14 @@ mod tests {
            }]
        );

-        let result = flattener
-            .flatten_variable(Variable {
-                name: "test".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::prec_left(
-                    Precedence::Integer(101),
-                    Rule::seq(vec![Rule::non_terminal(1)]),
-                ),
-            })
-            .unwrap();
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::prec_left(
+                Precedence::Integer(101),
+                Rule::seq(vec![Rule::non_terminal(1)]),
+            ),
+        });

        assert_eq!(
            result.productions,
@ -472,21 +378,18 @@ mod tests {

    #[test]
    fn test_flatten_grammar_with_field_names() {
-        let mut flattener = RuleFlattener::new(HashMap::default());
-        let result = flattener
-            .flatten_variable(Variable {
-                name: "test".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::seq(vec![
-                    Rule::field("first-thing".to_string(), Rule::terminal(1)),
-                    Rule::terminal(2),
-                    Rule::choice(vec![
-                        Rule::Blank,
-                        Rule::field("second-thing".to_string(), Rule::terminal(3)),
-                    ]),
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::field("first-thing".to_string(), Rule::terminal(1)),
+                Rule::terminal(2),
+                Rule::choice(vec![
+                    Rule::Blank,
+                    Rule::field("second-thing".to_string(), Rule::terminal(3)),
                ]),
-            })
-            .unwrap();
+            ]),
+        });

        assert_eq!(
            result.productions,
@ -509,32 +412,4 @@ mod tests {
            ]
        );
    }
-
-    #[test]
-    fn test_flatten_grammar_with_recursive_inline_variable() {
-        let result = flatten_grammar(ExtractedSyntaxGrammar {
-            extra_symbols: Vec::new(),
-            expected_conflicts: Vec::new(),
-            variables_to_inline: vec![Symbol::non_terminal(0)],
-            precedence_orderings: Vec::new(),
-            external_tokens: Vec::new(),
-            supertype_symbols: Vec::new(),
-            word_token: None,
-            reserved_word_sets: Vec::new(),
-            variables: vec![Variable {
-                name: "test".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::seq(vec![
-                    Rule::non_terminal(0),
-                    Rule::non_terminal(1),
-                    Rule::non_terminal(2),
-                ]),
-            }],
-        });
-
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "Rule `test` cannot be inlined because it contains a reference to itself",
-        );
-    }
 }
--- a/crates/generate/src/prepare_grammar/intern_symbols.rs
+++ b/crates/generate/src/prepare_grammar/intern_symbols.rs
@ -1,34 +1,13 @@
-use log::warn;
-use serde::Serialize;
-use thiserror::Error;
-
 use super::InternedGrammar;
-use crate::{
-    grammars::{InputGrammar, ReservedWordContext, Variable, VariableType},
-    rules::{Rule, Symbol},
-};
+use crate::generate::grammars::{InputGrammar, Variable, VariableType};
+use crate::generate::rules::{Rule, Symbol};
+use anyhow::{anyhow, Result};

-pub type InternSymbolsResult<T> = Result<T, InternSymbolsError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum InternSymbolsError {
-    #[error("A grammar's start rule must be visible.")]
-    HiddenStartRule,
-    #[error("Undefined symbol `{0}`")]
-    Undefined(String),
-    #[error("Undefined symbol `{0}` in grammar's supertypes array")]
-    UndefinedSupertype(String),
-    #[error("Undefined symbol `{0}` in grammar's conflicts array")]
-    UndefinedConflict(String),
-    #[error("Undefined symbol `{0}` as grammar's word token")]
-    UndefinedWordToken(String),
-}
-
-pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<InternedGrammar> {
+pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
    let interner = Interner { grammar };

    if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
-        Err(InternSymbolsError::HiddenStartRule)?;
+        return Err(anyhow!("A grammar's start rule must be visible."));
    }

    let mut variables = Vec::with_capacity(grammar.variables.len());
@ -36,13 +15,13 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<Inte
        variables.push(Variable {
            name: variable.name.clone(),
            kind: variable_type_for_name(&variable.name),
-            rule: interner.intern_rule(&variable.rule, Some(&variable.name))?,
+            rule: interner.intern_rule(&variable.rule)?,
        });
    }

    let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
    for external_token in &grammar.external_tokens {
-        let rule = interner.intern_rule(external_token, None)?;
+        let rule = interner.intern_rule(external_token)?;
        let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
            (name.clone(), variable_type_for_name(name))
        } else {
@ -53,36 +32,26 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<Inte

    let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
    for extra_token in &grammar.extra_symbols {
-        extra_symbols.push(interner.intern_rule(extra_token, None)?);
+        extra_symbols.push(interner.intern_rule(extra_token)?);
    }

    let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
    for supertype_symbol_name in &grammar.supertype_symbols {
-        supertype_symbols.push(interner.intern_name(supertype_symbol_name).ok_or_else(|| {
-            InternSymbolsError::UndefinedSupertype(supertype_symbol_name.clone())
-        })?);
+        supertype_symbols.push(
+            interner
+                .intern_name(supertype_symbol_name)
+                .ok_or_else(|| anyhow!("Undefined symbol `{supertype_symbol_name}`"))?,
+        );
    }

-    let mut reserved_words = Vec::with_capacity(grammar.reserved_words.len());
-    for reserved_word_set in &grammar.reserved_words {
-        let mut interned_set = Vec::with_capacity(reserved_word_set.reserved_words.len());
-        for rule in &reserved_word_set.reserved_words {
-            interned_set.push(interner.intern_rule(rule, None)?);
-        }
-        reserved_words.push(ReservedWordContext {
-            name: reserved_word_set.name.clone(),
-            reserved_words: interned_set,
-        });
-    }
-
-    let mut expected_conflicts = Vec::with_capacity(grammar.expected_conflicts.len());
+    let mut expected_conflicts = Vec::new();
    for conflict in &grammar.expected_conflicts {
        let mut interned_conflict = Vec::with_capacity(conflict.len());
        for name in conflict {
            interned_conflict.push(
                interner
                    .intern_name(name)
-                    .ok_or_else(|| InternSymbolsError::UndefinedConflict(name.clone()))?,
+                    .ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?,
            );
        }
        expected_conflicts.push(interned_conflict);
@ -95,15 +64,14 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<Inte
        }
    }

-    let word_token = if let Some(name) = grammar.word_token.as_ref() {
-        Some(
+    let mut word_token = None;
+    if let Some(name) = grammar.word_token.as_ref() {
+        word_token = Some(
            interner
                .intern_name(name)
-                .ok_or_else(|| InternSymbolsError::UndefinedWordToken(name.clone()))?,
-        )
-    } else {
-        None
-    };
+                .ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?,
+        );
+    }

    for (i, variable) in variables.iter_mut().enumerate() {
        if supertype_symbols.contains(&Symbol::non_terminal(i)) {
@ -120,7 +88,6 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<Inte
        supertype_symbols,
        word_token,
        precedence_orderings: grammar.precedence_orderings.clone(),
-        reserved_word_sets: reserved_words,
    })
 }

@ -128,38 +95,34 @@ struct Interner<'a> {
    grammar: &'a InputGrammar,
 }

-impl Interner<'_> {
-    fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> InternSymbolsResult<Rule> {
+impl<'a> Interner<'a> {
+    fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
        match rule {
            Rule::Choice(elements) => {
-                self.check_single(elements, name, "choice");
                let mut result = Vec::with_capacity(elements.len());
                for element in elements {
-                    result.push(self.intern_rule(element, name)?);
+                    result.push(self.intern_rule(element)?);
                }
                Ok(Rule::Choice(result))
            }
            Rule::Seq(elements) => {
-                self.check_single(elements, name, "seq");
                let mut result = Vec::with_capacity(elements.len());
                for element in elements {
-                    result.push(self.intern_rule(element, name)?);
+                    result.push(self.intern_rule(element)?);
                }
                Ok(Rule::Seq(result))
            }
-            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content, name)?))),
+            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
            Rule::Metadata { rule, params } => Ok(Rule::Metadata {
-                rule: Box::new(self.intern_rule(rule, name)?),
+                rule: Box::new(self.intern_rule(rule)?),
                params: params.clone(),
            }),
-            Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
-                rule: Box::new(self.intern_rule(rule, name)?),
-                context_name: context_name.clone(),
-            }),
+
            Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
-                || Err(InternSymbolsError::Undefined(name.clone())),
+                || Err(anyhow!("Undefined symbol `{name}`")),
                |symbol| Ok(Rule::Symbol(symbol)),
            ),
+
            _ => Ok(rule.clone()),
        }
    }
@ -181,17 +144,6 @@ impl Interner<'_> {

        None
    }
-
-    // In the case of a seq or choice rule of 1 element in a hidden rule, weird
-    // inconsistent behavior with queries can occur. So we should warn the user about it.
-    fn check_single(&self, elements: &[Rule], name: Option<&str>, kind: &str) {
-        if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) {
-            warn!(
-                "rule {} contains a `{kind}` rule with a single element. This is unnecessary.",
-                name.unwrap_or_default()
-            );
-        }
-    }
 }

 fn variable_type_for_name(name: &str) -> VariableType {
@ -278,9 +230,10 @@ mod tests {
    fn test_grammar_with_undefined_symbols() {
        let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));

-        assert!(result.is_err(), "Expected an error but got none");
-        let e = result.err().unwrap();
-        assert_eq!(e.to_string(), "Undefined symbol `y`");
+        match result {
+            Err(e) => assert_eq!(e.to_string(), "Undefined symbol `y`"),
+            _ => panic!("Expected an error but got none"),
+        }
    }

    fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
--- a/cli/src/generate/prepare_grammar/mod.rs
+++ b/cli/src/generate/prepare_grammar/mod.rs
@ -6,36 +6,26 @@ mod flatten_grammar;
 mod intern_symbols;
 mod process_inlines;

+pub use self::expand_tokens::expand_tokens;
+
+use self::expand_repeats::expand_repeats;
+use self::extract_default_aliases::extract_default_aliases;
+use self::extract_tokens::extract_tokens;
+use self::flatten_grammar::flatten_grammar;
+use self::intern_symbols::intern_symbols;
+use self::process_inlines::process_inlines;
+use super::grammars::{
+    ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
+    SyntaxGrammar, Variable,
+};
+use super::rules::{AliasMap, Precedence, Rule, Symbol};
+use anyhow::{anyhow, Result};
 use std::{
    cmp::Ordering,
-    collections::{hash_map, BTreeSet, HashMap, HashSet},
+    collections::{hash_map, HashMap, HashSet},
    mem,
 };

-pub use expand_tokens::ExpandTokensError;
-pub use extract_tokens::ExtractTokensError;
-pub use flatten_grammar::FlattenGrammarError;
-use indexmap::IndexMap;
-pub use intern_symbols::InternSymbolsError;
-pub use process_inlines::ProcessInlinesError;
-use serde::Serialize;
-use thiserror::Error;
-
-pub use self::expand_tokens::expand_tokens;
-use self::{
-    expand_repeats::expand_repeats, extract_default_aliases::extract_default_aliases,
-    extract_tokens::extract_tokens, flatten_grammar::flatten_grammar,
-    intern_symbols::intern_symbols, process_inlines::process_inlines,
-};
-use super::{
-    grammars::{
-        ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
-        SyntaxGrammar, Variable,
-    },
-    rules::{AliasMap, Precedence, Rule, Symbol},
-};
-use crate::grammars::ReservedWordContext;
-
 pub struct IntermediateGrammar<T, U> {
    variables: Vec<Variable>,
    extra_symbols: Vec<T>,
@ -45,7 +35,6 @@ pub struct IntermediateGrammar<T, U> {
    variables_to_inline: Vec<Symbol>,
    supertype_symbols: Vec<Symbol>,
    word_token: Option<Symbol>,
-    reserved_word_sets: Vec<ReservedWordContext<T>>,
 }

 pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
@ -69,96 +58,21 @@ impl<T, U> Default for IntermediateGrammar<T, U> {
            variables_to_inline: Vec::default(),
            supertype_symbols: Vec::default(),
            word_token: Option::default(),
-            reserved_word_sets: Vec::default(),
        }
    }
 }

-pub type PrepareGrammarResult<T> = Result<T, PrepareGrammarError>;
-
-#[derive(Debug, Error, Serialize)]
-#[error(transparent)]
-pub enum PrepareGrammarError {
-    ValidatePrecedences(#[from] ValidatePrecedenceError),
-    ValidateIndirectRecursion(#[from] IndirectRecursionError),
-    InternSymbols(#[from] InternSymbolsError),
-    ExtractTokens(#[from] ExtractTokensError),
-    FlattenGrammar(#[from] FlattenGrammarError),
-    ExpandTokens(#[from] ExpandTokensError),
-    ProcessInlines(#[from] ProcessInlinesError),
-}
-
-pub type ValidatePrecedenceResult<T> = Result<T, ValidatePrecedenceError>;
-
-#[derive(Debug, Error, Serialize)]
-#[error(transparent)]
-pub enum ValidatePrecedenceError {
-    Undeclared(#[from] UndeclaredPrecedenceError),
-    Ordering(#[from] ConflictingPrecedenceOrderingError),
-}
-
-#[derive(Debug, Error, Serialize)]
-pub struct IndirectRecursionError(pub Vec<String>);
-
-impl std::fmt::Display for IndirectRecursionError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Grammar contains an indirectly recursive rule: ")?;
-        for (i, symbol) in self.0.iter().enumerate() {
-            if i > 0 {
-                write!(f, " -> ")?;
-            }
-            write!(f, "{symbol}")?;
-        }
-        Ok(())
-    }
-}
-
-#[derive(Debug, Error, Serialize)]
-pub struct UndeclaredPrecedenceError {
-    pub precedence: String,
-    pub rule: String,
-}
-
-impl std::fmt::Display for UndeclaredPrecedenceError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "Undeclared precedence '{}' in rule '{}'",
-            self.precedence, self.rule
-        )?;
-        Ok(())
-    }
-}
-
-#[derive(Debug, Error, Serialize)]
-pub struct ConflictingPrecedenceOrderingError {
-    pub precedence_1: String,
-    pub precedence_2: String,
-}
-
-impl std::fmt::Display for ConflictingPrecedenceOrderingError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "Conflicting orderings for precedences {} and {}",
-            self.precedence_1, self.precedence_2
-        )?;
-        Ok(())
-    }
-}
-
 /// Transform an input grammar into separate components that are ready
 /// for parse table construction.
 pub fn prepare_grammar(
    input_grammar: &InputGrammar,
-) -> PrepareGrammarResult<(
+) -> Result<(
    SyntaxGrammar,
    LexicalGrammar,
    InlinedProductionMap,
    AliasMap,
 )> {
    validate_precedences(input_grammar)?;
-    validate_indirect_recursion(input_grammar)?;

    let interned_grammar = intern_symbols(input_grammar)?;
    let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
@ -170,115 +84,10 @@ pub fn prepare_grammar(
    Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
 }

-/// Check for indirect recursion cycles in the grammar that can cause infinite loops while
-/// parsing. An indirect recursion cycle occurs when a non-terminal can derive itself through
-/// a chain of single-symbol productions (e.g., A -> B, B -> A).
-fn validate_indirect_recursion(grammar: &InputGrammar) -> Result<(), IndirectRecursionError> {
-    let mut epsilon_transitions: IndexMap<&str, BTreeSet<String>> = IndexMap::new();
-
-    for variable in &grammar.variables {
-        let productions = get_single_symbol_productions(&variable.rule);
-        // Filter out rules that *directly* reference themselves, as this doesn't
-        // cause a parsing loop.
-        let filtered: BTreeSet<String> = productions
-            .into_iter()
-            .filter(|s| s != &variable.name)
-            .collect();
-        epsilon_transitions.insert(variable.name.as_str(), filtered);
-    }
-
-    for start_symbol in epsilon_transitions.keys() {
-        let mut visited = BTreeSet::new();
-        let mut path = Vec::new();
-        if let Some((start_idx, end_idx)) =
-            get_cycle(start_symbol, &epsilon_transitions, &mut visited, &mut path)
-        {
-            let cycle_symbols = path[start_idx..=end_idx]
-                .iter()
-                .map(|s| (*s).to_string())
-                .collect();
-            return Err(IndirectRecursionError(cycle_symbols));
-        }
-    }
-
-    Ok(())
-}
-
-fn get_single_symbol_productions(rule: &Rule) -> BTreeSet<String> {
-    match rule {
-        Rule::NamedSymbol(name) => BTreeSet::from([name.clone()]),
-        Rule::Choice(choices) => choices
-            .iter()
-            .flat_map(get_single_symbol_productions)
-            .collect(),
-        Rule::Metadata { rule, .. } => get_single_symbol_productions(rule),
-        _ => BTreeSet::new(),
-    }
-}
-
-/// Perform a depth-first search to detect cycles in single state transitions.
-fn get_cycle<'a>(
-    current: &'a str,
-    transitions: &'a IndexMap<&'a str, BTreeSet<String>>,
-    visited: &mut BTreeSet<&'a str>,
-    path: &mut Vec<&'a str>,
-) -> Option<(usize, usize)> {
-    if let Some(first_idx) = path.iter().position(|s| *s == current) {
-        path.push(current);
-        return Some((first_idx, path.len() - 1));
-    }
-
-    if visited.contains(current) {
-        return None;
-    }
-
-    path.push(current);
-    visited.insert(current);
-
-    if let Some(next_symbols) = transitions.get(current) {
-        for next in next_symbols {
-            if let Some(cycle) = get_cycle(next, transitions, visited, path) {
-                return Some(cycle);
-            }
-        }
-    }
-
-    path.pop();
-    None
-}
-
 /// Check that all of the named precedences used in the grammar are declared
 /// within the `precedences` lists, and also that there are no conflicting
 /// precedence orderings declared in those lists.
-fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()> {
-    // Check that no rule contains a named precedence that is not present in
-    // any of the `precedences` lists.
-    fn validate(
-        rule_name: &str,
-        rule: &Rule,
-        names: &HashSet<&String>,
-    ) -> ValidatePrecedenceResult<()> {
-        match rule {
-            Rule::Repeat(rule) => validate(rule_name, rule, names),
-            Rule::Seq(elements) | Rule::Choice(elements) => elements
-                .iter()
-                .try_for_each(|e| validate(rule_name, e, names)),
-            Rule::Metadata { rule, params } => {
-                if let Precedence::Name(n) = &params.precedence {
-                    if !names.contains(n) {
-                        Err(UndeclaredPrecedenceError {
-                            precedence: n.clone(),
-                            rule: rule_name.to_string(),
-                        })?;
-                    }
-                }
-                validate(rule_name, rule, names)?;
-                Ok(())
-            }
-            _ => Ok(()),
-        }
-    }
-
+fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
    // For any two precedence names `a` and `b`, if `a` comes before `b`
    // in some list, then it cannot come *after* `b` in any list.
    let mut pairs = HashMap::new();
@ -299,10 +108,9 @@ fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()>
                    }
                    hash_map::Entry::Occupied(e) => {
                        if e.get() != &ordering {
-                            Err(ConflictingPrecedenceOrderingError {
-                                precedence_1: entry1.to_string(),
-                                precedence_2: entry2.to_string(),
-                            })?;
+                            return Err(anyhow!(
+                                "Conflicting orderings for precedences {entry1} and {entry2}",
+                            ));
                        }
                    }
                }
@ -310,6 +118,27 @@ fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()>
        }
    }

+    // Check that no rule contains a named precedence that is not present in
+    // any of the `precedences` lists.
+    fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> {
+        match rule {
+            Rule::Repeat(rule) => validate(rule_name, rule, names),
+            Rule::Seq(elements) | Rule::Choice(elements) => elements
+                .iter()
+                .try_for_each(|e| validate(rule_name, e, names)),
+            Rule::Metadata { rule, params } => {
+                if let Precedence::Name(n) = &params.precedence {
+                    if !names.contains(n) {
+                        return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'"));
+                    }
+                }
+                validate(rule_name, rule, names)?;
+                Ok(())
+            }
+            _ => Ok(()),
+        }
+    }
+
    let precedence_names = grammar
        .precedence_orderings
        .iter()
@ -332,7 +161,7 @@ fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()>
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::grammars::VariableType;
+    use crate::generate::grammars::{InputGrammar, Variable, VariableType};

    #[test]
    fn test_validate_precedences_with_undeclared_precedence() {
--- a/crates/generate/src/prepare_grammar/process_inlines.rs
+++ b/crates/generate/src/prepare_grammar/process_inlines.rs
@ -1,17 +1,14 @@
-use std::collections::HashMap;
-
-use serde::Serialize;
-use thiserror::Error;
-
-use crate::{
+use crate::generate::{
    grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
    rules::SymbolType,
 };
+use anyhow::{anyhow, Result};
+use std::collections::HashMap;

 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 struct ProductionStepId {
    // A `None` value here means that the production itself was produced via inlining,
-    // and is stored in the builder's `productions` vector, as opposed to being
+    // and is stored in the the builder's `productions` vector, as opposed to being
    // stored in one of the grammar's variables.
    variable_index: Option<usize>,
    production_index: usize,
@ -70,13 +67,12 @@ impl InlinedProductionMapBuilder {
        let production_map = production_indices_by_step_id
            .into_iter()
            .map(|(step_id, production_indices)| {
-                let production =
-                    core::ptr::from_ref::<Production>(step_id.variable_index.map_or_else(
-                        || &productions[step_id.production_index],
-                        |variable_index| {
-                            &grammar.variables[variable_index].productions[step_id.production_index]
-                        },
-                    ));
+                let production = step_id.variable_index.map_or_else(
+                    || &productions[step_id.production_index],
+                    |variable_index| {
+                        &grammar.variables[variable_index].productions[step_id.production_index]
+                    },
+                ) as *const Production;
                ((production, step_id.step_index as u32), production_indices)
            })
            .collect();
@ -156,7 +152,7 @@ impl InlinedProductionMapBuilder {
                self.productions
                    .iter()
                    .position(|p| *p == production)
-                    .unwrap_or_else(|| {
+                    .unwrap_or({
                        self.productions.push(production);
                        self.productions.len() - 1
                    })
@ -189,38 +185,29 @@ impl InlinedProductionMapBuilder {
    }
 }

-pub type ProcessInlinesResult<T> = Result<T, ProcessInlinesError>;
-
-#[derive(Debug, Error, Serialize)]
-pub enum ProcessInlinesError {
-    #[error("External token `{0}` cannot be inlined")]
-    ExternalToken(String),
-    #[error("Token `{0}` cannot be inlined")]
-    Token(String),
-    #[error("Rule `{0}` cannot be inlined because it is the first rule")]
-    FirstRule(String),
-}
-
 pub(super) fn process_inlines(
    grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
-) -> ProcessInlinesResult<InlinedProductionMap> {
+) -> Result<InlinedProductionMap> {
    for symbol in &grammar.variables_to_inline {
        match symbol.kind {
            SymbolType::External => {
-                Err(ProcessInlinesError::ExternalToken(
-                    grammar.external_tokens[symbol.index].name.clone(),
-                ))?;
+                return Err(anyhow!(
+                    "External token `{}` cannot be inlined",
+                    grammar.external_tokens[symbol.index].name
+                ))
            }
            SymbolType::Terminal => {
-                Err(ProcessInlinesError::Token(
-                    lexical_grammar.variables[symbol.index].name.clone(),
-                ))?;
+                return Err(anyhow!(
+                    "Token `{}` cannot be inlined",
+                    lexical_grammar.variables[symbol.index].name,
+                ))
            }
            SymbolType::NonTerminal if symbol.index == 0 => {
-                Err(ProcessInlinesError::FirstRule(
-                    grammar.variables[symbol.index].name.clone(),
-                ))?;
+                return Err(anyhow!(
+                    "Rule `{}` cannot be inlined because it is the first rule",
+                    grammar.variables[symbol.index].name,
+                ))
            }
            _ => {}
        }
@ -236,10 +223,10 @@ pub(super) fn process_inlines(
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::{
-        grammars::{LexicalVariable, SyntaxVariable, VariableType},
-        rules::{Associativity, Precedence, Symbol},
+    use crate::generate::grammars::{
+        LexicalVariable, ProductionStep, SyntaxVariable, VariableType,
    };
+    use crate::generate::rules::{Associativity, Precedence, Symbol};

    #[test]
    fn test_basic_inlining() {
@ -377,10 +364,10 @@ mod tests {

        let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap();

-        let productions = inline_map
+        let productions: Vec<&Production> = inline_map
            .inlined_productions(&grammar.variables[0].productions[0], 1)
            .unwrap()
-            .collect::<Vec<_>>();
+            .collect();

        assert_eq!(
            productions.iter().copied().cloned().collect::<Vec<_>>(),
@ -476,10 +463,10 @@ mod tests {

        let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap();

-        let productions = inline_map
+        let productions: Vec<_> = inline_map
            .inlined_productions(&grammar.variables[0].productions[0], 0)
            .unwrap()
-            .collect::<Vec<_>>();
+            .collect();

        assert_eq!(
            productions.iter().copied().cloned().collect::<Vec<_>>(),
@ -549,9 +536,10 @@ mod tests {
            ..Default::default()
        };

-        let result = process_inlines(&grammar, &lexical_grammar);
-        assert!(result.is_err(), "expected an error, but got none");
-        let err = result.err().unwrap();
-        assert_eq!(err.to_string(), "Token `something` cannot be inlined",);
+        if let Err(error) = process_inlines(&grammar, &lexical_grammar) {
+            assert_eq!(error.to_string(), "Token `something` cannot be inlined");
+        } else {
+            panic!("expected an error, but got none");
+        }
    }
 }
--- a/cli/src/generate/prepare_grammar/unicode-categories.json
+++ b/cli/src/generate/prepare_grammar/unicode-categories.json
--- a/cli/src/generate/prepare_grammar/unicode-category-aliases.json
+++ b/cli/src/generate/prepare_grammar/unicode-category-aliases.json
@ -0,0 +1 @@
+{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
--- a/cli/src/generate/prepare_grammar/unicode-properties.json
+++ b/cli/src/generate/prepare_grammar/unicode-properties.json
--- a/cli/src/generate/prepare_grammar/unicode-property-aliases.json
+++ b/cli/src/generate/prepare_grammar/unicode-property-aliases.json
@ -0,0 +1 @@
+{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","EqUIdeo":"Equivalent_Unified_Ideograph","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}
--- a/crates/generate/src/render.rs
+++ b/crates/generate/src/render.rs
--- a/crates/generate/src/rules.rs
+++ b/crates/generate/src/rules.rs
@ -1,11 +1,9 @@
-use std::{collections::BTreeMap, fmt};
-
-use serde::Serialize;
-use smallbitvec::SmallBitVec;
-
 use super::grammars::VariableType;
+use smallbitvec::SmallBitVec;
+use std::iter::FromIterator;
+use std::{collections::HashMap, fmt};

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum SymbolType {
    External,
    End,
@ -14,19 +12,19 @@ pub enum SymbolType {
    NonTerminal,
 }

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum Associativity {
    Left,
    Right,
 }

-#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct Alias {
    pub value: String,
    pub is_named: bool,
 }

-#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
 pub enum Precedence {
    #[default]
    None,
@ -34,50 +32,48 @@ pub enum Precedence {
    Name(String),
 }

-pub type AliasMap = BTreeMap<Symbol, Alias>;
+pub type AliasMap = HashMap<Symbol, Alias>;

-#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize)]
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
 pub struct MetadataParams {
    pub precedence: Precedence,
    pub dynamic_precedence: i32,
    pub associativity: Option<Associativity>,
    pub is_token: bool,
+    pub is_string: bool,
+    pub is_active: bool,
    pub is_main_token: bool,
    pub alias: Option<Alias>,
    pub field_name: Option<String>,
 }

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct Symbol {
    pub kind: SymbolType,
    pub index: usize,
 }

-#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub enum Rule {
    Blank,
    String(String),
    Pattern(String, String),
    NamedSymbol(String),
    Symbol(Symbol),
-    Choice(Vec<Self>),
+    Choice(Vec<Rule>),
    Metadata {
        params: MetadataParams,
-        rule: Box<Self>,
-    },
-    Repeat(Box<Self>),
-    Seq(Vec<Self>),
-    Reserved {
-        rule: Box<Self>,
-        context_name: String,
+        rule: Box<Rule>,
    },
+    Repeat(Box<Rule>),
+    Seq(Vec<Rule>),
 }

 // Because tokens are represented as small (~400 max) unsigned integers,
 // sets of tokens can be efficiently represented as bit vectors with each
 // index corresponding to a token, and each value representing whether or not
 // the token is present in the set.
-#[derive(Default, Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct TokenSet {
    terminal_bits: SmallBitVec,
    external_bits: SmallBitVec,
@ -85,32 +81,6 @@ pub struct TokenSet {
    end_of_nonterminal_extra: bool,
 }

-impl fmt::Debug for TokenSet {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_list().entries(self.iter()).finish()
-    }
-}
-
-impl PartialOrd for TokenSet {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for TokenSet {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.terminal_bits
-            .iter()
-            .cmp(other.terminal_bits.iter())
-            .then_with(|| self.external_bits.iter().cmp(other.external_bits.iter()))
-            .then_with(|| self.eof.cmp(&other.eof))
-            .then_with(|| {
-                self.end_of_nonterminal_extra
-                    .cmp(&other.end_of_nonterminal_extra)
-            })
-    }
-}
-
 impl Rule {
    pub fn field(name: String, content: Self) -> Self {
        add_metadata(content, move |params| {
@ -175,21 +145,9 @@ impl Rule {
        Self::Choice(elements)
    }

-    pub const fn seq(rules: Vec<Self>) -> Self {
+    pub fn seq(rules: Vec<Self>) -> Self {
        Self::Seq(rules)
    }
-
-    pub fn is_empty(&self) -> bool {
-        match self {
-            Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false,
-            Self::String(string) => string.is_empty(),
-            Self::Metadata { rule, .. } | Self::Repeat(rule) | Self::Reserved { rule, .. } => {
-                rule.is_empty()
-            }
-            Self::Choice(rules) => rules.iter().any(Self::is_empty),
-            Self::Seq(rules) => rules.iter().all(Self::is_empty),
-        }
-    }
 }

 impl Alias {
@ -306,14 +264,14 @@ impl Symbol {
 }

 impl From<Symbol> for Rule {
+    #[must_use]
    fn from(symbol: Symbol) -> Self {
        Self::Symbol(symbol)
    }
 }

 impl TokenSet {
-    #[must_use]
-    pub const fn new() -> Self {
+    pub fn new() -> Self {
        Self {
            terminal_bits: SmallBitVec::new(),
            external_bits: SmallBitVec::new(),
@ -424,9 +382,6 @@ impl TokenSet {
        };
        if other.index < vec.len() && vec[other.index] {
            vec.set(other.index, false);
-            while vec.last() == Some(false) {
-                vec.pop();
-            }
            return true;
        }
        false
@ -439,13 +394,6 @@ impl TokenSet {
            && !self.external_bits.iter().any(|a| a)
    }

-    pub fn len(&self) -> usize {
-        self.eof as usize
-            + self.end_of_nonterminal_extra as usize
-            + self.terminal_bits.iter().filter(|b| *b).count()
-            + self.external_bits.iter().filter(|b| *b).count()
-    }
-
    pub fn insert_all_terminals(&mut self, other: &Self) -> bool {
        let mut result = false;
        if other.terminal_bits.len() > self.terminal_bits.len() {
--- a/crates/generate/src/tables.rs
+++ b/crates/generate/src/tables.rs
@ -1,9 +1,6 @@
+use super::nfa::CharacterSet;
+use super::rules::{Alias, Symbol, TokenSet};
 use std::collections::BTreeMap;
-
-use super::{
-    nfa::CharacterSet,
-    rules::{Alias, Symbol, TokenSet},
-};
 pub type ProductionInfoId = usize;
 pub type ParseStateId = usize;
 pub type LexStateId = usize;
@ -47,7 +44,6 @@ pub struct ParseState {
    pub id: ParseStateId,
    pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
    pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
-    pub reserved_words: TokenSet,
    pub lex_state_id: usize,
    pub external_lex_state_id: usize,
    pub core_id: usize,
@ -65,7 +61,7 @@ pub struct ProductionInfo {
    pub field_map: BTreeMap<String, Vec<FieldLocation>>,
 }

-#[derive(Debug, Default, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct ParseTable {
    pub states: Vec<ParseState>,
    pub symbols: Vec<Symbol>,
@ -93,7 +89,6 @@ pub struct LexTable {
 }

 impl ParseTableEntry {
-    #[must_use]
    pub const fn new() -> Self {
        Self {
            reusable: true,
--- a/cli/src/generate/templates/binding.cc
+++ b/cli/src/generate/templates/binding.cc
@ -0,0 +1,28 @@
+#include "tree_sitter/parser.h"
+#include <node.h>
+#include "nan.h"
+
+using namespace v8;
+
+extern "C" TSLanguage * tree_sitter_PARSER_NAME();
+
+namespace {
+
+NAN_METHOD(New) {}
+
+void Init(Local<Object> exports, Local<Object> module) {
+  Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
+  tpl->SetClassName(Nan::New("Language").ToLocalChecked());
+  tpl->InstanceTemplate()->SetInternalFieldCount(1);
+
+  Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
+  Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
+  Nan::SetInternalFieldPointer(instance, 0, tree_sitter_PARSER_NAME());
+
+  Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("PARSER_NAME").ToLocalChecked());
+  Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
+}
+
+NODE_MODULE(tree_sitter_PARSER_NAME_binding, Init)
+
+}  // namespace
--- a/cli/src/generate/templates/binding.gyp
+++ b/cli/src/generate/templates/binding.gyp
@ -0,0 +1,19 @@
+{
+  "targets": [
+    {
+      "target_name": "tree_sitter_PARSER_NAME_binding",
+      "include_dirs": [
+        "<!(node -e \"require('nan')\")",
+        "src"
+      ],
+      "sources": [
+        "bindings/node/binding.cc",
+        "src/parser.c",
+        # If your language uses an external scanner, add it here.
+      ],
+      "cflags_c": [
+        "-std=c99",
+      ]
+    }
+  ]
+}
--- a/cli/src/generate/templates/build.rs
+++ b/cli/src/generate/templates/build.rs
@ -0,0 +1,40 @@
+fn main() {
+    let src_dir = std::path::Path::new("src");
+
+    let mut c_config = cc::Build::new();
+    c_config.include(&src_dir);
+    c_config
+        .flag_if_supported("-Wno-unused-parameter")
+        .flag_if_supported("-Wno-unused-but-set-variable")
+        .flag_if_supported("-Wno-trigraphs");
+    let parser_path = src_dir.join("parser.c");
+    c_config.file(&parser_path);
+
+    // If your language uses an external scanner written in C,
+    // then include this block of code:
+
+    /*
+    let scanner_path = src_dir.join("scanner.c");
+    c_config.file(&scanner_path);
+    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+    */
+
+    c_config.compile("parser");
+    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
+
+    // If your language uses an external scanner written in C++,
+    // then include this block of code:
+
+    /*
+    let mut cpp_config = cc::Build::new();
+    cpp_config.cpp(true);
+    cpp_config.include(&src_dir);
+    cpp_config
+        .flag_if_supported("-Wno-unused-parameter")
+        .flag_if_supported("-Wno-unused-but-set-variable");
+    let scanner_path = src_dir.join("scanner.cc");
+    cpp_config.file(&scanner_path);
+    cpp_config.compile("scanner");
+    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+    */
+}
--- a/cli/src/generate/templates/cargo.toml
+++ b/cli/src/generate/templates/cargo.toml
@ -0,0 +1,26 @@
+[package]
+name = "tree-sitter-PARSER_NAME"
+description = "PARSER_NAME grammar for the tree-sitter parsing library"
+version = "0.0.1"
+keywords = ["incremental", "parsing", "PARSER_NAME"]
+categories = ["parsing", "text-editors"]
+repository = "https://github.com/tree-sitter/tree-sitter-PARSER_NAME"
+edition = "2018"
+license = "MIT"
+
+build = "bindings/rust/build.rs"
+include = [
+  "bindings/rust/*",
+  "grammar.js",
+  "queries/*",
+  "src/*",
+]
+
+[lib]
+path = "bindings/rust/lib.rs"
+
+[dependencies]
+tree-sitter = "~RUST_BINDING_VERSION"
+
+[build-dependencies]
+cc = "1.0"
--- a/cli/src/generate/templates/index.js
+++ b/cli/src/generate/templates/index.js
@ -0,0 +1,19 @@
+try {
+  module.exports = require("../../build/Release/tree_sitter_PARSER_NAME_binding");
+} catch (error1) {
+  if (error1.code !== 'MODULE_NOT_FOUND') {
+    throw error1;
+  }
+  try {
+    module.exports = require("../../build/Debug/tree_sitter_PARSER_NAME_binding");
+  } catch (error2) {
+    if (error2.code !== 'MODULE_NOT_FOUND') {
+      throw error2;
+    }
+    throw error1
+  }
+}
+
+try {
+  module.exports.nodeTypeInfo = require("../../src/node-types.json");
+} catch (_) {}
--- a/Show more
+++ b/Show more
Author	SHA1	Message	Date
Amaan Qureshi	2c192fa038	feat!: introduce parser introspection via the repo's Semantic Version `baked in`	2024-02-12 02:27:17 -05:00
Amaan Qureshi	b66b1a7a92	refactor!: expose the allocator family of functions for consumption in scanners	2024-02-12 01:02:41 -05:00
				`@ -1 +0,0 @@`
				`See [docs/src/6-contributing.md](./docs/src/6-contributing.md)`
				`@ -0,0 +1 @@`
				{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
				`@ -0,0 +1 @@`
				{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","EqUIdeo":"Equivalent_Unified_Ideograph","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}