Merge pull request #260 from tree-sitter/rust-cli
Include CLI functionality in the main repo, using Rust instead of C++
This commit is contained in:
commit
49392c8502
287 changed files with 16654 additions and 18526 deletions
|
|
@ -1,26 +1,50 @@
|
|||
image: Visual Studio 2017
|
||||
build: false
|
||||
install:
|
||||
# Terminate early unless building either a tag or a PR.
|
||||
- if "%APPVEYOR_REPO_TAG%" == "false" if not "%APPVEYOR_REPO_BRANCH%" == "master" appveyor exit
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
# Install rust
|
||||
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
|
||||
- IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain stable --default-host i686-pc-windows-msvc
|
||||
- IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain stable --default-host x86_64-pc-windows-msvc
|
||||
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
|
||||
- rustc -vV
|
||||
- cargo -vV
|
||||
|
||||
# Install dependencies
|
||||
- git submodule update --init
|
||||
|
||||
platform:
|
||||
- x86
|
||||
- x64
|
||||
|
||||
init:
|
||||
- git config --global core.autocrlf false
|
||||
|
||||
install:
|
||||
- IF "%PLATFORM%" == "x86" (call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars32.bat")
|
||||
- IF "%PLATFORM%" == "x64" (call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat")
|
||||
- script\configure.cmd
|
||||
- script\fetch-fixtures.cmd
|
||||
- x86
|
||||
|
||||
test_script:
|
||||
- script\test.cmd
|
||||
# Fetch and regenerate the fixture parsers
|
||||
- script\fetch-fixtures.cmd
|
||||
- cargo build --release
|
||||
- script\regenerate-fixtures.cmd
|
||||
|
||||
build: off
|
||||
# Run tests
|
||||
- script\test.cmd
|
||||
- script\benchmark.cmd
|
||||
|
||||
before_deploy:
|
||||
- move target\release\tree-sitter.exe tree-sitter.exe
|
||||
- 7z a -tgzip tree-sitter-windows-%PLATFORM%.gz tree-sitter.exe
|
||||
- appveyor PushArtifact tree-sitter-windows-%PLATFORM%.gz
|
||||
|
||||
deploy:
|
||||
description: ''
|
||||
provider: GitHub
|
||||
auth_token:
|
||||
secure: VC9ntV5+inKoNteZyLQksKzWMKXF46P+Jx3JHKVSfF+o1rWtZn2iIHAVsQv5LaUi
|
||||
artifact: /tree-sitter-windows-.*/
|
||||
draft: true
|
||||
force_update: true
|
||||
on:
|
||||
APPVEYOR_REPO_TAG: true
|
||||
|
||||
cache:
|
||||
- target
|
||||
- test\fixtures\grammars
|
||||
- C:\Users\appveyor\.cargo
|
||||
|
|
|
|||
|
|
@ -1,65 +0,0 @@
|
|||
---
|
||||
Language: Cpp
|
||||
AccessModifierOffset: -1
|
||||
AlignAfterOpenBracket: true
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignEscapedNewlinesLeft: true
|
||||
AlignOperands: true
|
||||
AlignTrailingComments: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: Empty
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
AlwaysBreakTemplateDeclarations: true
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeBraces: Attach
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
ColumnLimit: 80
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 2
|
||||
Cpp11BracedListStyle: false
|
||||
DerivePointerAlignment: true
|
||||
DisableFormat: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
IndentCaseLabels: true
|
||||
IndentWidth: 2
|
||||
IndentWrappedFunctionNames: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 60
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyExcessCharacter: 20
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
PointerAlignment: Left
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: Auto
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
...
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
-std=c++14
|
||||
-Isrc
|
||||
-Itest
|
||||
-Iinclude
|
||||
-Iexternals/utf8proc
|
||||
-Iexternals/json-parser
|
||||
-Iexternals/bandit
|
||||
-Iexternals/crypto-algorithms
|
||||
27
.gitignore
vendored
27
.gitignore
vendored
|
|
@ -1,22 +1,17 @@
|
|||
# Compiled binaries
|
||||
out
|
||||
*.a
|
||||
*.o
|
||||
log*.html
|
||||
|
||||
fuzz-results
|
||||
log.html
|
||||
|
||||
# Generated build config files
|
||||
gyp-mac-tool
|
||||
Makefile
|
||||
*.Makefile
|
||||
*.target.mk
|
||||
|
||||
# IDE files
|
||||
.idea
|
||||
*.xcodeproj
|
||||
|
||||
# Dev dependencies
|
||||
fuzz-results
|
||||
|
||||
test/fixtures/grammars/*
|
||||
!test/fixtures/grammars/.gitkeep
|
||||
externals/cpplint.py
|
||||
|
||||
/target
|
||||
*.rs.bk
|
||||
*.a
|
||||
*.o
|
||||
*.obj
|
||||
*.exp
|
||||
*.lib
|
||||
|
|
|
|||
14
.gitmodules
vendored
14
.gitmodules
vendored
|
|
@ -1,15 +1,3 @@
|
|||
[submodule "externals/bandit"]
|
||||
path = externals/bandit
|
||||
url = https://github.com/joakimkarlsson/bandit.git
|
||||
[submodule "externals/gyp"]
|
||||
path = externals/gyp
|
||||
url = https://github.com/svn2github/gyp.git
|
||||
[submodule "externals/utf8proc"]
|
||||
path = externals/utf8proc
|
||||
path = lib/utf8proc
|
||||
url = https://github.com/julialang/utf8proc
|
||||
[submodule "externals/json-parser"]
|
||||
path = externals/json-parser
|
||||
url = https://github.com/udp/json-parser.git
|
||||
[submodule "externals/crypto-algorithms"]
|
||||
path = externals/crypto-algorithms
|
||||
url = https://github.com/maxbrunsfeld/crypto-algorithms.git
|
||||
|
|
|
|||
56
.travis.yml
56
.travis.yml
|
|
@ -1,28 +1,44 @@
|
|||
sudo: false
|
||||
dist: trusty
|
||||
language: cpp
|
||||
compiler:
|
||||
- gcc
|
||||
language: rust
|
||||
rust:
|
||||
- stable
|
||||
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-5
|
||||
- clang
|
||||
|
||||
install:
|
||||
- export CXX="g++-5"
|
||||
- scan-build script/configure
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
|
||||
script:
|
||||
- script/ci
|
||||
# Fetch and regenerate the fixture parsers
|
||||
- script/fetch-fixtures
|
||||
- cargo build --release
|
||||
- script/regenerate-fixtures
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- test/fixtures/grammars
|
||||
# Run tests
|
||||
- export TREE_SITTER_STATIC_ANALYSIS=1
|
||||
- script/test
|
||||
- script/benchmark
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /\d+\.\d+\.\d+/
|
||||
|
||||
before_deploy:
|
||||
- cp target/release/tree-sitter .
|
||||
- gzip --suffix "-${TRAVIS_OS_NAME}-x64.gz" tree-sitter
|
||||
|
||||
deploy:
|
||||
provider: releases
|
||||
api_key:
|
||||
secure: "cAd2mQP+Q55v3zedo5ZyOVc3hq3XKMW93lp5LuXV6CYKYbIhkyfym4qfs+C9GJQiIP27cnePYM7B3+OMIFwSPIgXHWWSsuloMtDgYSc/PAwb2dZnJqAyog3BohW/QiGTSnvbVlxPF6P9RMQU6+JP0HJzEJy6QBTa4Und/j0jm24="
|
||||
file_glob: true
|
||||
file: "tree-sitter-*.gz"
|
||||
draft: true
|
||||
overwrite: true
|
||||
skip_cleanup: true
|
||||
on:
|
||||
tags: true
|
||||
|
||||
cache:
|
||||
cargo: true
|
||||
directories:
|
||||
- test/fixtures/grammars
|
||||
|
|
|
|||
727
Cargo.lock
generated
Normal file
727
Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,727 @@
|
|||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.6.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "argon2rs"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace-sys"
|
||||
version = "0.1.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "blake2-rfc"
|
||||
version = "0.2.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.32.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cloudabi"
|
||||
version = "0.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "difference"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "dirs"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "failure"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "failure_derive"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-zircon"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-zircon-sys"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodrop"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "4.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.39"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-rational"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "0.4.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "0.6.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_isaac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_os"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_pcg"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xorshift"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rdrand"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.1.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "redox_termios"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"argon2rs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rsass"
|
||||
version = "0.9.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nom 4.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "scoped_threadpool"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver-parser"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallbitvec"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "0.15.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "synstructure"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termion"
|
||||
version = "1.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.3.5"
|
||||
dependencies = [
|
||||
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-cli"
|
||||
version = "0.14.0-beta4"
|
||||
dependencies = [
|
||||
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tree-sitter 0.3.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ucd-util"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[metadata]
|
||||
"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
|
||||
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||
"checksum argon2rs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3f67b0b6a86dae6e67ff4ca2b6201396074996379fba2b92ff649126f37cb392"
|
||||
"checksum arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f405cc4c21cd8b784f6c8fc2adf9bc00f59558f0049b5ec21517f875963040cc"
|
||||
"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
|
||||
"checksum autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727"
|
||||
"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a"
|
||||
"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
|
||||
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
|
||||
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
|
||||
"checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d"
|
||||
"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
|
||||
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
|
||||
"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
|
||||
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
|
||||
"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e"
|
||||
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
|
||||
"checksum dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "88972de891f6118092b643d85a0b28e0678e0f948d7f879aa32f2d5aafe97d2a"
|
||||
"checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7"
|
||||
"checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596"
|
||||
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
||||
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||
"checksum hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "64b7d419d0622ae02fe5da6b9a5e1964b610a65bb37923b976aeebb6dbb8f86e"
|
||||
"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
|
||||
"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
|
||||
"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
|
||||
"checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311"
|
||||
"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2"
|
||||
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
|
||||
"checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
|
||||
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
|
||||
"checksum nom 4.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9c349f68f25f596b9f44cf0e7c69752a5c633b0550c3ff849518bfba0233774a"
|
||||
"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
|
||||
"checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10"
|
||||
"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
|
||||
"checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09"
|
||||
"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c"
|
||||
"checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
|
||||
"checksum rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5"
|
||||
"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
|
||||
"checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db"
|
||||
"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
|
||||
"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
|
||||
"checksum rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca"
|
||||
"checksum rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05"
|
||||
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
|
||||
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
|
||||
"checksum redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d"
|
||||
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
|
||||
"checksum redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "214a97e49be64fd2c86f568dd0cb2c757d2cc53de95b273b6ad0a1c908482f26"
|
||||
"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f"
|
||||
"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1"
|
||||
"checksum rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7a5dde55023a6c19470f7aeb59f75f897d8b80cbe00d61dfcaf7bbbe3de4c0a6"
|
||||
"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
|
||||
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
|
||||
"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
|
||||
"checksum scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
|
||||
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
|
||||
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
|
||||
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||
"checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
|
||||
"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
|
||||
"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
|
||||
"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
|
||||
"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55"
|
||||
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
|
||||
"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
|
||||
"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015"
|
||||
"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
|
||||
"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6"
|
||||
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
|
||||
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
|
||||
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
|
||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
|
||||
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
|
||||
"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
|
||||
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
|
||||
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
6
Cargo.toml
Normal file
6
Cargo.toml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
[workspace]
|
||||
|
||||
members = [
|
||||
"cli",
|
||||
"lib",
|
||||
]
|
||||
22
LICENSE
22
LICENSE
|
|
@ -1,7 +1,21 @@
|
|||
Copyright 2014 Max Brunsfeld
|
||||
The MIT License (MIT)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
Copyright (c) 2018 Max Brunsfeld
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
|
|||
44
cli/Cargo.toml
Normal file
44
cli/Cargo.toml
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
[package]
|
||||
name = "tree-sitter-cli"
|
||||
version = "0.14.0-beta4"
|
||||
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[[bin]]
|
||||
name = "tree-sitter"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bench]]
|
||||
name = "benchmark"
|
||||
harness = false
|
||||
|
||||
[dependencies]
|
||||
cc = "1.0"
|
||||
ansi_term = "0.11"
|
||||
difference = "2.0"
|
||||
lazy_static = "1.2.0"
|
||||
smallbitvec = "2.3.0"
|
||||
clap = "2.32"
|
||||
dirs = "1.0.2"
|
||||
hashbrown = "0.1"
|
||||
libloading = "0.5"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
regex-syntax = "0.6.4"
|
||||
regex = "1"
|
||||
rsass = "0.9"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
path = "../lib"
|
||||
|
||||
[dependencies.serde_json]
|
||||
version = "1.0"
|
||||
features = ["preserve_order"]
|
||||
|
||||
[dependencies.log]
|
||||
version = "0.4.6"
|
||||
features = ["std"]
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.6.4"
|
||||
spin = "0.5"
|
||||
172
cli/benches/benchmark.rs
Normal file
172
cli/benches/benchmark.rs
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
use lazy_static::lazy_static;
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Instant;
|
||||
use std::{env, fs, usize};
|
||||
use tree_sitter::{Language, Parser};
|
||||
use tree_sitter_cli::loader::Loader;
|
||||
|
||||
include!("../src/tests/helpers/dirs.rs");
|
||||
|
||||
lazy_static! {
|
||||
static ref LANGUAGE_FILTER: Option<String> =
|
||||
env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok();
|
||||
static ref EXAMPLE_FILTER: Option<String> =
|
||||
env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok();
|
||||
static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
|
||||
static ref EXAMPLE_PATHS_BY_LANGUAGE_NAME: BTreeMap<String, Vec<PathBuf>> = {
|
||||
let mut result = BTreeMap::new();
|
||||
let grammar_dirs = fs::read_dir(&(*GRAMMARS_DIR)).unwrap();
|
||||
for grammar_dir in grammar_dirs {
|
||||
let grammar_dir = grammar_dir.unwrap();
|
||||
if !grammar_dir.path().is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let language_name = grammar_dir.file_name();
|
||||
let language_name = language_name.to_str().unwrap();
|
||||
if let Ok(example_files) = fs::read_dir(&grammar_dir.path().join("examples")) {
|
||||
result.insert(
|
||||
language_name.to_string(),
|
||||
example_files
|
||||
.filter_map(|p| {
|
||||
let p = p.unwrap().path();
|
||||
if p.is_file() {
|
||||
Some(p)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
);
|
||||
} else {
|
||||
result.insert(language_name.to_string(), Vec::new());
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
};
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut parser = Parser::new();
|
||||
let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_NAME
|
||||
.iter()
|
||||
.flat_map(|(_, paths)| paths.iter())
|
||||
.map(|p| p.file_name().unwrap().to_str().unwrap().chars().count())
|
||||
.max()
|
||||
.unwrap();
|
||||
|
||||
let mut all_normal_speeds = Vec::new();
|
||||
let mut all_error_speeds = Vec::new();
|
||||
|
||||
for (language_name, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_NAME.iter() {
|
||||
// TODO - remove after fixing slow error parsing HTML.
|
||||
if language_name == "html" {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
|
||||
if language_name != filter.as_str() {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("\nLanguage: {}", language_name);
|
||||
parser.set_language(get_language(language_name)).unwrap();
|
||||
|
||||
eprintln!(" Normal examples:");
|
||||
let mut normal_speeds = Vec::new();
|
||||
for example_path in example_paths {
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
if !example_path.to_str().unwrap().contains(filter.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
normal_speeds.push(parse(&mut parser, example_path, max_path_length));
|
||||
}
|
||||
|
||||
eprintln!(" Error examples (mismatched languages):");
|
||||
let mut error_speeds = Vec::new();
|
||||
for (other_language_name, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_NAME.iter() {
|
||||
if other_language_name != language_name {
|
||||
for example_path in example_paths {
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
if !example_path.to_str().unwrap().contains(filter.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
error_speeds.push(parse(&mut parser, example_path, max_path_length));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) {
|
||||
eprintln!(" Average Speed (normal): {} bytes/ms", average_normal);
|
||||
eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal);
|
||||
}
|
||||
|
||||
if let Some((average_error, worst_error)) = aggregate(&error_speeds) {
|
||||
eprintln!(" Average Speed (errors): {} bytes/ms", average_error);
|
||||
eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error);
|
||||
}
|
||||
|
||||
all_normal_speeds.extend(normal_speeds);
|
||||
all_error_speeds.extend(error_speeds);
|
||||
}
|
||||
|
||||
eprintln!("\nOverall");
|
||||
if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
|
||||
eprintln!(" Average Speed (normal): {} bytes/ms", average_normal);
|
||||
eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal);
|
||||
}
|
||||
|
||||
if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) {
|
||||
eprintln!(" Average Speed (errors): {} bytes/ms", average_error);
|
||||
eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error);
|
||||
}
|
||||
eprintln!("");
|
||||
}
|
||||
|
||||
fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> {
|
||||
if speeds.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let mut total = 0;
|
||||
let mut max = usize::MAX;
|
||||
for speed in speeds.iter().cloned() {
|
||||
total += speed;
|
||||
if speed < max {
|
||||
max = speed;
|
||||
}
|
||||
}
|
||||
Some((total / speeds.len(), max))
|
||||
}
|
||||
|
||||
fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize {
|
||||
eprint!(
|
||||
" {:width$}\t",
|
||||
example_path.file_name().unwrap().to_str().unwrap(),
|
||||
width = max_path_length
|
||||
);
|
||||
|
||||
let source_code = fs::read(example_path).unwrap();
|
||||
let time = Instant::now();
|
||||
let _tree = parser
|
||||
.parse_utf8(&mut |byte, _| &source_code[byte..], None)
|
||||
.expect("Incompatible language version");
|
||||
let duration = time.elapsed();
|
||||
let duration_ms =
|
||||
duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0;
|
||||
let speed = (source_code.len() as f64 / duration_ms) as usize;
|
||||
eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed);
|
||||
speed
|
||||
}
|
||||
|
||||
fn get_language(name: &str) -> Language {
|
||||
TEST_LOADER
|
||||
.load_language_at_path(name, &GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR)
|
||||
.unwrap()
|
||||
}
|
||||
32
cli/build.rs
Normal file
32
cli/build.rs
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
use std::{env, fs, io};
|
||||
|
||||
fn main() {
|
||||
let git_sha = read_git_sha().unwrap();
|
||||
println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
|
||||
|
||||
println!(
|
||||
"cargo:rustc-env=BUILD_TARGET={}",
|
||||
std::env::var("TARGET").unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
fn read_git_sha() -> io::Result<String> {
|
||||
let git_path = env::current_dir().unwrap().parent().unwrap().join(".git");
|
||||
let git_head_path = git_path.join("HEAD");
|
||||
println!("cargo:rerun-if-changed={}", git_head_path.to_str().unwrap());
|
||||
let mut head_content = fs::read_to_string(&git_head_path)?;
|
||||
assert!(head_content.ends_with("\n"));
|
||||
head_content.pop();
|
||||
|
||||
if head_content.starts_with("ref: ") {
|
||||
// We're on a branch. Read the SHA from the ref file.
|
||||
head_content.replace_range(0.."ref: ".len(), "");
|
||||
let ref_filename = git_path.join(&head_content);
|
||||
println!("cargo:rerun-if-changed={}", ref_filename.to_str().unwrap());
|
||||
fs::read_to_string(&ref_filename)
|
||||
} else {
|
||||
// We're not on a branch. The `HEAD` file itself contains the sha.
|
||||
assert_eq!(head_content.len(), 40);
|
||||
Ok(head_content)
|
||||
}
|
||||
}
|
||||
4
cli/npm/.gitignore
vendored
Normal file
4
cli/npm/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
tree-sitter
|
||||
tree-sitter.exe
|
||||
*.gz
|
||||
*.tgz
|
||||
12
cli/npm/cli.js
Executable file
12
cli/npm/cli.js
Executable file
|
|
@ -0,0 +1,12 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
const path = require('path');
|
||||
const spawn = require("child_process").spawn;
|
||||
const executable = process.platform === 'win32'
|
||||
? 'tree-sitter.exe'
|
||||
: 'tree-sitter';
|
||||
spawn(
|
||||
path.join(__dirname, executable),
|
||||
process.argv.slice(2),
|
||||
{stdio: 'inherit'}
|
||||
).on('close', process.exit)
|
||||
67
cli/npm/install.js
Executable file
67
cli/npm/install.js
Executable file
|
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const zlib = require('zlib');
|
||||
const https = require('https');
|
||||
const packageJSON = require('./package.json');
|
||||
|
||||
// Determine the URL of the file.
|
||||
const platformName = {
|
||||
'darwin': 'osx',
|
||||
'linux': 'linux',
|
||||
'win32': 'windows'
|
||||
}[process.platform];
|
||||
if (!platformName) {
|
||||
throw new Error(`Cannot install tree-sitter-cli for platform ${process.platform}`);
|
||||
}
|
||||
|
||||
const archName = {
|
||||
'x64': 'x64',
|
||||
'x86': 'x86',
|
||||
'ia32': 'x86'
|
||||
}[process.arch];
|
||||
if (!archName) {
|
||||
throw new Error(`Cannot install tree-sitter-cli for architecture ${process.arch}`);
|
||||
}
|
||||
|
||||
const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/${packageJSON.version}`;
|
||||
const assetName = `tree-sitter-${platformName}-${archName}.gz`;
|
||||
const assetURL = `${releaseURL}/${assetName}`;
|
||||
|
||||
// Remove previously-downloaded files.
|
||||
const executableName = process.platform === 'win32' ? 'tree-sitter.exe' : 'tree-sitter';
|
||||
if (fs.existsSync(executableName)) {
|
||||
fs.unlinkSync(executableName);
|
||||
}
|
||||
|
||||
// Download the compressed file.
|
||||
console.log(`Downloading ${assetURL}`);
|
||||
const file = fs.createWriteStream(executableName);
|
||||
get(assetURL, response => {
|
||||
if (response.statusCode > 299) {
|
||||
throw new Error([
|
||||
'Download failed',
|
||||
'',
|
||||
`url: ${assetURL}`,
|
||||
`status: ${response.statusCode}`,
|
||||
`headers: ${JSON.stringify(response.headers, null, 2)}`,
|
||||
'',
|
||||
].join('\n'));
|
||||
}
|
||||
response.pipe(zlib.createGunzip()).pipe(file);
|
||||
});
|
||||
|
||||
file.on('finish', () => {
|
||||
fs.chmodSync(executableName, '755');
|
||||
});
|
||||
|
||||
// Follow redirects.
|
||||
function get(url, callback) {
|
||||
https.get(url, response => {
|
||||
if (response.statusCode === 301 || response.statusCode === 302) {
|
||||
get(response.headers.location, callback);
|
||||
} else {
|
||||
callback(response);
|
||||
}
|
||||
});
|
||||
}
|
||||
5
cli/npm/package-lock.json
generated
Normal file
5
cli/npm/package-lock.json
generated
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.14.0-beta4",
|
||||
"lockfileVersion": 1
|
||||
}
|
||||
22
cli/npm/package.json
Normal file
22
cli/npm/package.json
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.14.0-beta4",
|
||||
"author": "Max Brunsfeld",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "http://github.com/tree-sitter/tree-sitter.git"
|
||||
},
|
||||
"description": "CLI for generating fast incremental parsers",
|
||||
"keywords": [
|
||||
"parser",
|
||||
"lexer"
|
||||
],
|
||||
"main": "lib/api/index.js",
|
||||
"scripts": {
|
||||
"install": "node install.js"
|
||||
},
|
||||
"bin": {
|
||||
"tree-sitter": "cli.js"
|
||||
}
|
||||
}
|
||||
44
cli/src/error.rs
Normal file
44
cli/src/error.rs
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
use std::io;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error(pub String);
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl Error {
|
||||
pub fn grammar(message: &str) -> Self {
|
||||
Error(format!("Grammar error: {}", message))
|
||||
}
|
||||
|
||||
pub fn regex(message: &str) -> Self {
|
||||
Error(format!("Regex error: {}", message))
|
||||
}
|
||||
|
||||
pub fn undefined_symbol(name: &str) -> Self {
|
||||
Error(format!("Undefined symbol `{}`", name))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(error: serde_json::Error) -> Self {
|
||||
Error(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(error: io::Error) -> Self {
|
||||
Error(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rsass::Error> for Error {
|
||||
fn from(error: rsass::Error) -> Self {
|
||||
Error(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Error {
|
||||
fn from(error: String) -> Self {
|
||||
Error(error)
|
||||
}
|
||||
}
|
||||
333
cli/src/generate/build_tables/build_lex_table.rs
Normal file
333
cli/src/generate/build_tables/build_lex_table.rs
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
use super::coincident_tokens::CoincidentTokenIndex;
|
||||
use super::item::TokenSet;
|
||||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::rules::Symbol;
|
||||
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
|
||||
use log::info;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap, VecDeque};
|
||||
|
||||
pub(crate) fn build_lex_table(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
keywords: &TokenSet,
|
||||
coincident_token_index: &CoincidentTokenIndex,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
minimize: bool,
|
||||
) -> (LexTable, LexTable) {
|
||||
let keyword_lex_table;
|
||||
if syntax_grammar.word_token.is_some() {
|
||||
let mut builder = LexTableBuilder::new(lexical_grammar);
|
||||
builder.add_state_for_tokens(keywords);
|
||||
keyword_lex_table = builder.table;
|
||||
} else {
|
||||
keyword_lex_table = LexTable::default();
|
||||
}
|
||||
|
||||
let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec<ParseStateId>)> = Vec::new();
|
||||
for (i, state) in parse_table.states.iter().enumerate() {
|
||||
let tokens = state
|
||||
.terminal_entries
|
||||
.keys()
|
||||
.filter_map(|token| {
|
||||
if token.is_terminal() {
|
||||
if keywords.contains(&token) {
|
||||
syntax_grammar.word_token
|
||||
} else {
|
||||
Some(*token)
|
||||
}
|
||||
} else if token.is_eof() {
|
||||
Some(*token)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut did_merge = false;
|
||||
for entry in parse_state_ids_by_token_set.iter_mut() {
|
||||
if merge_token_set(
|
||||
&mut entry.0,
|
||||
&tokens,
|
||||
lexical_grammar,
|
||||
token_conflict_map,
|
||||
coincident_token_index,
|
||||
) {
|
||||
did_merge = true;
|
||||
entry.1.push(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !did_merge {
|
||||
parse_state_ids_by_token_set.push((tokens, vec![i]));
|
||||
}
|
||||
}
|
||||
|
||||
let mut builder = LexTableBuilder::new(lexical_grammar);
|
||||
for (tokens, parse_state_ids) in parse_state_ids_by_token_set {
|
||||
let lex_state_id = builder.add_state_for_tokens(&tokens);
|
||||
for id in parse_state_ids {
|
||||
parse_table.states[id].lex_state_id = lex_state_id;
|
||||
}
|
||||
}
|
||||
|
||||
let mut table = builder.table;
|
||||
|
||||
if minimize {
|
||||
minimize_lex_table(&mut table, parse_table);
|
||||
}
|
||||
|
||||
(table, keyword_lex_table)
|
||||
}
|
||||
|
||||
struct QueueEntry {
|
||||
state_id: usize,
|
||||
nfa_states: Vec<u32>,
|
||||
eof_valid: bool,
|
||||
}
|
||||
|
||||
struct LexTableBuilder<'a> {
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
cursor: NfaCursor<'a>,
|
||||
table: LexTable,
|
||||
state_queue: VecDeque<QueueEntry>,
|
||||
state_ids_by_nfa_state_set: HashMap<(Vec<u32>, bool), usize>,
|
||||
}
|
||||
|
||||
impl<'a> LexTableBuilder<'a> {
|
||||
fn new(lexical_grammar: &'a LexicalGrammar) -> Self {
|
||||
Self {
|
||||
lexical_grammar,
|
||||
cursor: NfaCursor::new(&lexical_grammar.nfa, vec![]),
|
||||
table: LexTable::default(),
|
||||
state_queue: VecDeque::new(),
|
||||
state_ids_by_nfa_state_set: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_state_for_tokens(&mut self, tokens: &TokenSet) -> usize {
|
||||
let mut eof_valid = false;
|
||||
let nfa_states = tokens
|
||||
.iter()
|
||||
.filter_map(|token| {
|
||||
if token.is_terminal() {
|
||||
Some(self.lexical_grammar.variables[token.index].start_state)
|
||||
} else {
|
||||
eof_valid = true;
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let (state_id, is_new) = self.add_state(nfa_states, eof_valid);
|
||||
|
||||
if is_new {
|
||||
info!(
|
||||
"entry point state: {}, tokens: {:?}",
|
||||
state_id,
|
||||
tokens
|
||||
.iter()
|
||||
.map(|t| &self.lexical_grammar.variables[t.index].name)
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
while let Some(QueueEntry {
|
||||
state_id,
|
||||
nfa_states,
|
||||
eof_valid,
|
||||
}) = self.state_queue.pop_front()
|
||||
{
|
||||
self.populate_state(state_id, nfa_states, eof_valid);
|
||||
}
|
||||
state_id
|
||||
}
|
||||
|
||||
fn add_state(&mut self, nfa_states: Vec<u32>, eof_valid: bool) -> (usize, bool) {
|
||||
self.cursor.reset(nfa_states);
|
||||
match self
|
||||
.state_ids_by_nfa_state_set
|
||||
.entry((self.cursor.state_ids.clone(), eof_valid))
|
||||
{
|
||||
Entry::Occupied(o) => (*o.get(), false),
|
||||
Entry::Vacant(v) => {
|
||||
let state_id = self.table.states.len();
|
||||
self.table.states.push(LexState::default());
|
||||
self.state_queue.push_back(QueueEntry {
|
||||
state_id,
|
||||
nfa_states: v.key().0.clone(),
|
||||
eof_valid,
|
||||
});
|
||||
v.insert(state_id);
|
||||
(state_id, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>, eof_valid: bool) {
|
||||
self.cursor.force_reset(nfa_states);
|
||||
|
||||
// The EOF state is represented as an empty list of NFA states.
|
||||
let mut completion = None;
|
||||
for (id, prec) in self.cursor.completions() {
|
||||
if let Some((prev_id, prev_precedence)) = completion {
|
||||
if TokenConflictMap::prefer_token(
|
||||
self.lexical_grammar,
|
||||
(prev_precedence, prev_id),
|
||||
(prec, id),
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
completion = Some((id, prec));
|
||||
}
|
||||
|
||||
let transitions = self.cursor.transitions();
|
||||
let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
|
||||
|
||||
// If EOF is a valid lookahead token, add a transition predicated on the null
|
||||
// character that leads to the empty set of NFA states.
|
||||
if eof_valid {
|
||||
let (next_state_id, _) = self.add_state(Vec::new(), false);
|
||||
self.table.states[state_id].advance_actions.push((
|
||||
CharacterSet::empty().add_char('\0'),
|
||||
AdvanceAction {
|
||||
state: Some(next_state_id),
|
||||
in_main_token: true,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
for transition in transitions {
|
||||
if let Some((completed_id, completed_precedence)) = completion {
|
||||
if !TokenConflictMap::prefer_transition(
|
||||
&self.lexical_grammar,
|
||||
&transition,
|
||||
completed_id,
|
||||
completed_precedence,
|
||||
has_sep,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let (next_state_id, _) =
|
||||
self.add_state(transition.states, eof_valid && transition.is_separator);
|
||||
let next_state = if next_state_id == state_id {
|
||||
None
|
||||
} else {
|
||||
Some(next_state_id)
|
||||
};
|
||||
self.table.states[state_id].advance_actions.push((
|
||||
transition.characters,
|
||||
AdvanceAction {
|
||||
state: next_state,
|
||||
in_main_token: !transition.is_separator,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
if let Some((complete_id, _)) = completion {
|
||||
self.table.states[state_id].accept_action = Some(Symbol::terminal(complete_id));
|
||||
} else if self.cursor.state_ids.is_empty() {
|
||||
self.table.states[state_id].accept_action = Some(Symbol::end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_token_set(
|
||||
tokens: &mut TokenSet,
|
||||
other: &TokenSet,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
coincident_token_index: &CoincidentTokenIndex,
|
||||
) -> bool {
|
||||
for i in 0..lexical_grammar.variables.len() {
|
||||
let symbol = Symbol::terminal(i);
|
||||
let set_without_terminal = match (tokens.contains_terminal(i), other.contains_terminal(i)) {
|
||||
(true, false) => other,
|
||||
(false, true) => tokens,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
for existing_token in set_without_terminal.terminals() {
|
||||
if token_conflict_map.does_conflict(i, existing_token.index)
|
||||
|| !coincident_token_index.contains(symbol, existing_token)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tokens.insert_all(other);
|
||||
true
|
||||
}
|
||||
|
||||
fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
||||
let mut state_replacements = BTreeMap::new();
|
||||
let mut done = false;
|
||||
while !done {
|
||||
done = true;
|
||||
for (i, state_i) in table.states.iter().enumerate() {
|
||||
if state_replacements.contains_key(&i) {
|
||||
continue;
|
||||
}
|
||||
for (j, state_j) in table.states.iter().enumerate() {
|
||||
if j == i {
|
||||
break;
|
||||
}
|
||||
if state_replacements.contains_key(&j) {
|
||||
continue;
|
||||
}
|
||||
if state_i == state_j {
|
||||
info!("replace state {} with state {}", i, j);
|
||||
state_replacements.insert(i, j);
|
||||
done = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for state in table.states.iter_mut() {
|
||||
for (_, advance_action) in state.advance_actions.iter_mut() {
|
||||
advance_action.state = advance_action
|
||||
.state
|
||||
.map(|s| state_replacements.get(&s).cloned().unwrap_or(s))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_state_replacements = (0..table.states.len())
|
||||
.into_iter()
|
||||
.map(|state_id| {
|
||||
let replacement = state_replacements
|
||||
.get(&state_id)
|
||||
.cloned()
|
||||
.unwrap_or(state_id);
|
||||
let prior_removed = state_replacements
|
||||
.iter()
|
||||
.take_while(|i| *i.0 < replacement)
|
||||
.count();
|
||||
replacement - prior_removed
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for state in parse_table.states.iter_mut() {
|
||||
state.lex_state_id = final_state_replacements[state.lex_state_id];
|
||||
}
|
||||
|
||||
for state in table.states.iter_mut() {
|
||||
for (_, advance_action) in state.advance_actions.iter_mut() {
|
||||
advance_action.state = advance_action.state.map(|s| final_state_replacements[s]);
|
||||
}
|
||||
}
|
||||
|
||||
let mut i = 0;
|
||||
table.states.retain(|_| {
|
||||
let result = !state_replacements.contains_key(&i);
|
||||
i += 1;
|
||||
result
|
||||
});
|
||||
}
|
||||
750
cli/src/generate/build_tables/build_parse_table.rs
Normal file
750
cli/src/generate/build_tables/build_parse_table.rs
Normal file
|
|
@ -0,0 +1,750 @@
|
|||
use super::item::{ParseItem, ParseItemSet, TokenSet};
|
||||
use super::item_set_builder::ParseItemSetBuilder;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::generate::grammars::{
|
||||
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
|
||||
};
|
||||
use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
|
||||
use crate::generate::tables::{
|
||||
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use hashbrown::hash_map::Entry;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::VecDeque;
|
||||
use std::u32;
|
||||
|
||||
use std::fmt::Write;
|
||||
use std::hash::Hasher;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AuxiliarySymbolInfo {
|
||||
auxiliary_symbol: Symbol,
|
||||
parent_symbols: Vec<Symbol>,
|
||||
}
|
||||
|
||||
type SymbolSequence = Vec<Symbol>;
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
|
||||
struct ParseStateQueueEntry {
|
||||
preceding_symbols: SymbolSequence,
|
||||
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
||||
state_id: ParseStateId,
|
||||
}
|
||||
|
||||
struct ParseTableBuilder<'a> {
|
||||
item_set_builder: ParseItemSetBuilder<'a>,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
parse_table: ParseTable,
|
||||
state_ids_to_log: Vec<ParseStateId>,
|
||||
}
|
||||
|
||||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<ParseTable> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table.alias_sequences.push(Vec::new());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
|
||||
// Add the starting state at index 1.
|
||||
self.add_parse_state(
|
||||
&Vec::new(),
|
||||
&Vec::new(),
|
||||
ParseItemSet::with(
|
||||
[(
|
||||
ParseItem::start(),
|
||||
[Symbol::end()].iter().cloned().collect(),
|
||||
)]
|
||||
.iter()
|
||||
.cloned(),
|
||||
),
|
||||
);
|
||||
|
||||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||
let item_set = self
|
||||
.item_set_builder
|
||||
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
|
||||
|
||||
if self.state_ids_to_log.contains(&entry.state_id) {
|
||||
eprintln!(
|
||||
"state: {}\n\ninitial item set:\n\n{}closed item set:\n\n{}",
|
||||
entry.state_id,
|
||||
super::item::ParseItemSetDisplay(
|
||||
&self.item_sets_by_state_id[entry.state_id],
|
||||
self.syntax_grammar,
|
||||
self.lexical_grammar,
|
||||
),
|
||||
super::item::ParseItemSetDisplay(
|
||||
&item_set,
|
||||
self.syntax_grammar,
|
||||
self.lexical_grammar,
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
self.add_actions(
|
||||
entry.preceding_symbols,
|
||||
entry.preceding_auxiliary_symbols,
|
||||
entry.state_id,
|
||||
item_set,
|
||||
)?;
|
||||
}
|
||||
|
||||
self.remove_precedences();
|
||||
|
||||
Ok(self.parse_table)
|
||||
}
|
||||
|
||||
fn add_parse_state(
|
||||
&mut self,
|
||||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
|
||||
item_set: ParseItemSet<'a>,
|
||||
) -> ParseStateId {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
item_set.hash_unfinished_items(&mut hasher);
|
||||
let unfinished_item_signature = hasher.finish();
|
||||
|
||||
match self.state_ids_by_item_set.entry(item_set) {
|
||||
Entry::Occupied(o) => *o.get(),
|
||||
Entry::Vacant(v) => {
|
||||
let state_id = self.parse_table.states.len();
|
||||
self.item_sets_by_state_id.push(v.key().clone());
|
||||
self.parse_table.states.push(ParseState {
|
||||
lex_state_id: 0,
|
||||
terminal_entries: HashMap::new(),
|
||||
nonterminal_entries: HashMap::new(),
|
||||
unfinished_item_signature,
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
state_id,
|
||||
preceding_symbols: preceding_symbols.clone(),
|
||||
preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
|
||||
});
|
||||
v.insert(state_id);
|
||||
state_id
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_actions(
|
||||
&mut self,
|
||||
mut preceding_symbols: SymbolSequence,
|
||||
mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
|
||||
state_id: ParseStateId,
|
||||
item_set: ParseItemSet<'a>,
|
||||
) -> Result<()> {
|
||||
let mut terminal_successors = HashMap::new();
|
||||
let mut non_terminal_successors = HashMap::new();
|
||||
let mut lookaheads_with_conflicts = HashSet::new();
|
||||
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
if let Some(next_symbol) = item.symbol() {
|
||||
let successor = item.successor();
|
||||
if next_symbol.is_non_terminal() {
|
||||
// Keep track of where auxiliary non-terminals (repeat symbols) are
|
||||
// used within visible symbols. This information may be needed later
|
||||
// for conflict resolution.
|
||||
if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
|
||||
preceding_auxiliary_symbols
|
||||
.push(self.get_auxiliary_node_info(&item_set, next_symbol));
|
||||
}
|
||||
|
||||
non_terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(|| ParseItemSet::default())
|
||||
.insert(successor, lookaheads);
|
||||
} else {
|
||||
terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(|| ParseItemSet::default())
|
||||
.insert(successor, lookaheads);
|
||||
}
|
||||
} else {
|
||||
let action = if item.is_augmented() {
|
||||
ParseAction::Accept
|
||||
} else {
|
||||
ParseAction::Reduce {
|
||||
symbol: Symbol::non_terminal(item.variable_index as usize),
|
||||
child_count: item.step_index as usize,
|
||||
precedence: item.precedence(),
|
||||
associativity: item.associativity(),
|
||||
dynamic_precedence: item.production.dynamic_precedence,
|
||||
alias_sequence_id: self.get_alias_sequence_id(item),
|
||||
}
|
||||
};
|
||||
|
||||
for lookahead in lookaheads.iter() {
|
||||
let entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.entry(lookahead);
|
||||
let entry = entry.or_insert_with(|| ParseTableEntry::new());
|
||||
if entry.actions.is_empty() {
|
||||
entry.actions.push(action);
|
||||
} else if action.precedence() > entry.actions[0].precedence() {
|
||||
entry.actions.clear();
|
||||
entry.actions.push(action);
|
||||
lookaheads_with_conflicts.remove(&lookahead);
|
||||
} else if action.precedence() == entry.actions[0].precedence() {
|
||||
entry.actions.push(action);
|
||||
lookaheads_with_conflicts.insert(lookahead);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (symbol, next_item_set) in terminal_successors {
|
||||
preceding_symbols.push(symbol);
|
||||
let next_state_id = self.add_parse_state(
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
|
||||
let entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.entry(symbol);
|
||||
if let Entry::Occupied(e) = &entry {
|
||||
if !e.get().actions.is_empty() {
|
||||
lookaheads_with_conflicts.insert(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
entry
|
||||
.or_insert_with(|| ParseTableEntry::new())
|
||||
.actions
|
||||
.push(ParseAction::Shift {
|
||||
state: next_state_id,
|
||||
is_repetition: false,
|
||||
});
|
||||
}
|
||||
|
||||
for (symbol, next_item_set) in non_terminal_successors {
|
||||
preceding_symbols.push(symbol);
|
||||
let next_state_id = self.add_parse_state(
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
self.parse_table.states[state_id]
|
||||
.nonterminal_entries
|
||||
.insert(symbol, next_state_id);
|
||||
}
|
||||
|
||||
for symbol in lookaheads_with_conflicts {
|
||||
self.handle_conflict(
|
||||
&item_set,
|
||||
state_id,
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
symbol,
|
||||
)?;
|
||||
}
|
||||
|
||||
let state = &mut self.parse_table.states[state_id];
|
||||
for extra_token in &self.syntax_grammar.extra_tokens {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(*extra_token)
|
||||
.or_insert(ParseTableEntry {
|
||||
reusable: true,
|
||||
actions: vec![ParseAction::ShiftExtra],
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_conflict(
|
||||
&mut self,
|
||||
item_set: &ParseItemSet,
|
||||
state_id: ParseStateId,
|
||||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
|
||||
conflicting_lookahead: Symbol,
|
||||
) -> Result<()> {
|
||||
let entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.get_mut(&conflicting_lookahead)
|
||||
.unwrap();
|
||||
|
||||
// Determine which items in the set conflict with each other, and the
|
||||
// precedences associated with SHIFT vs REDUCE actions. There won't
|
||||
// be multiple REDUCE actions with different precedences; that is
|
||||
// sorted out ahead of time in `add_actions`. But there can still be
|
||||
// REDUCE-REDUCE conflicts where all actions have the *same*
|
||||
// precedence, and there can still be SHIFT/REDUCE conflicts.
|
||||
let reduce_precedence = entry.actions[0].precedence();
|
||||
let mut considered_associativity = false;
|
||||
let mut shift_precedence: Option<Range<i32>> = None;
|
||||
let mut conflicting_items = HashSet::new();
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
if let Some(step) = item.step() {
|
||||
if item.step_index > 0 {
|
||||
if self
|
||||
.item_set_builder
|
||||
.first_set(&step.symbol)
|
||||
.contains(&conflicting_lookahead)
|
||||
{
|
||||
if item.variable_index != u32::MAX {
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
|
||||
let precedence = item.precedence();
|
||||
if let Some(range) = &mut shift_precedence {
|
||||
if precedence < range.start {
|
||||
range.start = precedence;
|
||||
} else if precedence > range.end {
|
||||
range.end = precedence;
|
||||
}
|
||||
} else {
|
||||
shift_precedence = Some(precedence..precedence);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if lookaheads.contains(&conflicting_lookahead) {
|
||||
if item.variable_index != u32::MAX {
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
|
||||
let shift_precedence = shift_precedence.unwrap_or(0..0);
|
||||
|
||||
// If all of the items in the conflict have the same parent symbol,
|
||||
// and that parent symbols is auxiliary, then this is just the intentional
|
||||
// ambiguity associated with a repeat rule. Resolve that class of ambiguity
|
||||
// by leaving it in the parse table, but marking the SHIFT action with
|
||||
// an `is_repetition` flag.
|
||||
let conflicting_variable_index =
|
||||
conflicting_items.iter().next().unwrap().variable_index;
|
||||
if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
|
||||
if conflicting_items
|
||||
.iter()
|
||||
.all(|item| item.variable_index == conflicting_variable_index)
|
||||
{
|
||||
*is_repetition = true;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// If the SHIFT action has higher precedence, remove all the REDUCE actions.
|
||||
if shift_precedence.start > reduce_precedence
|
||||
|| (shift_precedence.start == reduce_precedence
|
||||
&& shift_precedence.end > reduce_precedence)
|
||||
{
|
||||
entry.actions.drain(0..entry.actions.len() - 1);
|
||||
}
|
||||
// If the REDUCE actions have higher precedence, remove the SHIFT action.
|
||||
else if shift_precedence.end < reduce_precedence
|
||||
|| (shift_precedence.end == reduce_precedence
|
||||
&& shift_precedence.start < reduce_precedence)
|
||||
{
|
||||
entry.actions.pop();
|
||||
conflicting_items.retain(|item| item.is_done());
|
||||
}
|
||||
// If the SHIFT and REDUCE actions have the same predence, consider
|
||||
// the REDUCE actions' associativity.
|
||||
else if shift_precedence == (reduce_precedence..reduce_precedence) {
|
||||
considered_associativity = true;
|
||||
let mut has_left = false;
|
||||
let mut has_right = false;
|
||||
let mut has_non = false;
|
||||
for action in &entry.actions {
|
||||
if let ParseAction::Reduce { associativity, .. } = action {
|
||||
match associativity {
|
||||
Some(Associativity::Left) => has_left = true,
|
||||
Some(Associativity::Right) => has_right = true,
|
||||
None => has_non = true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If all reduce actions are left associative, remove the SHIFT action.
|
||||
// If all reduce actions are right associative, remove the REDUCE actions.
|
||||
match (has_left, has_non, has_right) {
|
||||
(true, false, false) => {
|
||||
entry.actions.pop();
|
||||
conflicting_items.retain(|item| item.is_done());
|
||||
}
|
||||
(false, false, true) => {
|
||||
entry.actions.drain(0..entry.actions.len() - 1);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If all of the actions but one have been eliminated, then there's no problem.
|
||||
let entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.get_mut(&conflicting_lookahead)
|
||||
.unwrap();
|
||||
if entry.actions.len() == 1 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Determine the set of parent symbols involved in this conflict.
|
||||
let mut actual_conflict = Vec::new();
|
||||
for item in &conflicting_items {
|
||||
let symbol = Symbol::non_terminal(item.variable_index as usize);
|
||||
if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
|
||||
actual_conflict.extend(
|
||||
preceding_auxiliary_symbols
|
||||
.iter()
|
||||
.rev()
|
||||
.find_map(|info| {
|
||||
if info.auxiliary_symbol == symbol {
|
||||
Some(&info.parent_symbols)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap()
|
||||
.iter(),
|
||||
);
|
||||
} else {
|
||||
actual_conflict.push(symbol);
|
||||
}
|
||||
}
|
||||
actual_conflict.sort_unstable();
|
||||
actual_conflict.dedup();
|
||||
|
||||
// If this set of symbols has been whitelisted, then there's no error.
|
||||
if self
|
||||
.syntax_grammar
|
||||
.expected_conflicts
|
||||
.contains(&actual_conflict)
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
|
||||
for symbol in preceding_symbols {
|
||||
write!(&mut msg, " {}", self.symbol_name(symbol)).unwrap();
|
||||
}
|
||||
|
||||
write!(
|
||||
&mut msg,
|
||||
" • {} …\n\n",
|
||||
self.symbol_name(&conflicting_lookahead)
|
||||
)
|
||||
.unwrap();
|
||||
write!(&mut msg, "Possible interpretations:\n\n").unwrap();
|
||||
|
||||
let interpretions = conflicting_items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, item)| {
|
||||
let mut line = String::new();
|
||||
write!(&mut line, " {}:", i + 1).unwrap();
|
||||
|
||||
for preceding_symbol in preceding_symbols
|
||||
.iter()
|
||||
.take(preceding_symbols.len() - item.step_index as usize)
|
||||
{
|
||||
write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap();
|
||||
}
|
||||
|
||||
write!(
|
||||
&mut line,
|
||||
" ({}",
|
||||
&self.syntax_grammar.variables[item.variable_index as usize].name
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for (j, step) in item.production.steps.iter().enumerate() {
|
||||
if j as u32 == item.step_index {
|
||||
write!(&mut line, " •").unwrap();
|
||||
}
|
||||
write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap();
|
||||
}
|
||||
|
||||
write!(&mut line, ")").unwrap();
|
||||
|
||||
if item.is_done() {
|
||||
write!(
|
||||
&mut line,
|
||||
" • {} …",
|
||||
self.symbol_name(&conflicting_lookahead)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let precedence = item.precedence();
|
||||
let associativity = item.associativity();
|
||||
|
||||
let prec_line = if let Some(associativity) = associativity {
|
||||
Some(format!(
|
||||
"(precedence: {}, associativity: {:?})",
|
||||
precedence, associativity
|
||||
))
|
||||
} else if precedence > 0 {
|
||||
Some(format!("(precedence: {})", precedence))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
(line, prec_line)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let max_interpretation_length = interpretions
|
||||
.iter()
|
||||
.map(|i| i.0.chars().count())
|
||||
.max()
|
||||
.unwrap();
|
||||
|
||||
for (line, prec_suffix) in interpretions {
|
||||
msg += &line;
|
||||
if let Some(prec_suffix) = prec_suffix {
|
||||
for _ in line.chars().count()..max_interpretation_length {
|
||||
msg.push(' ');
|
||||
}
|
||||
msg += " ";
|
||||
msg += &prec_suffix;
|
||||
}
|
||||
msg.push('\n');
|
||||
}
|
||||
|
||||
let mut resolution_count = 0;
|
||||
write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
|
||||
let shift_items = conflicting_items
|
||||
.iter()
|
||||
.filter(|i| !i.is_done())
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
if actual_conflict.len() > 1 {
|
||||
if shift_items.len() > 0 {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a higher precedence in",
|
||||
resolution_count
|
||||
)
|
||||
.unwrap();
|
||||
for (i, item) in shift_items.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(&mut msg, " and").unwrap();
|
||||
}
|
||||
write!(
|
||||
&mut msg,
|
||||
" `{}`",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
write!(&mut msg, " than in the other rules.\n").unwrap();
|
||||
}
|
||||
|
||||
for item in &conflicting_items {
|
||||
if item.is_done() {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a higher precedence in `{}` than in the other rules.\n",
|
||||
resolution_count,
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if considered_associativity {
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a left or right associativity in ",
|
||||
resolution_count
|
||||
)
|
||||
.unwrap();
|
||||
for (i, item) in conflicting_items.iter().filter(|i| i.is_done()).enumerate() {
|
||||
if i > 0 {
|
||||
write!(&mut msg, " and ").unwrap();
|
||||
}
|
||||
write!(
|
||||
&mut msg,
|
||||
"`{}`",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
write!(&mut msg, "\n").unwrap();
|
||||
}
|
||||
|
||||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Add a conflict for these rules: ",
|
||||
resolution_count
|
||||
)
|
||||
.unwrap();
|
||||
for (i, symbol) in actual_conflict.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(&mut msg, ", ").unwrap();
|
||||
}
|
||||
write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
|
||||
}
|
||||
write!(&mut msg, "\n").unwrap();
|
||||
|
||||
Err(Error(msg))
|
||||
}
|
||||
|
||||
fn get_auxiliary_node_info(
|
||||
&self,
|
||||
item_set: &ParseItemSet,
|
||||
symbol: Symbol,
|
||||
) -> AuxiliarySymbolInfo {
|
||||
let parent_symbols = item_set
|
||||
.entries
|
||||
.iter()
|
||||
.filter_map(|(item, _)| {
|
||||
let variable_index = item.variable_index as usize;
|
||||
if item.symbol() == Some(symbol)
|
||||
&& !self.syntax_grammar.variables[variable_index].is_auxiliary()
|
||||
{
|
||||
Some(Symbol::non_terminal(variable_index))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
AuxiliarySymbolInfo {
|
||||
auxiliary_symbol: symbol,
|
||||
parent_symbols,
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_precedences(&mut self) {
|
||||
for state in self.parse_table.states.iter_mut() {
|
||||
for (_, entry) in state.terminal_entries.iter_mut() {
|
||||
for action in entry.actions.iter_mut() {
|
||||
match action {
|
||||
ParseAction::Reduce {
|
||||
precedence,
|
||||
associativity,
|
||||
..
|
||||
} => {
|
||||
*precedence = 0;
|
||||
*associativity = None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
|
||||
let mut alias_sequence: Vec<Option<Alias>> = item
|
||||
.production
|
||||
.steps
|
||||
.iter()
|
||||
.map(|s| s.alias.clone())
|
||||
.collect();
|
||||
while alias_sequence.last() == Some(&None) {
|
||||
alias_sequence.pop();
|
||||
}
|
||||
if item.production.steps.len() > self.parse_table.max_aliased_production_length {
|
||||
self.parse_table.max_aliased_production_length = item.production.steps.len()
|
||||
}
|
||||
if let Some(index) = self
|
||||
.parse_table
|
||||
.alias_sequences
|
||||
.iter()
|
||||
.position(|seq| *seq == alias_sequence)
|
||||
{
|
||||
index
|
||||
} else {
|
||||
self.parse_table.alias_sequences.push(alias_sequence);
|
||||
self.parse_table.alias_sequences.len() - 1
|
||||
}
|
||||
}
|
||||
|
||||
fn symbol_name(&self, symbol: &Symbol) -> String {
|
||||
match symbol.kind {
|
||||
SymbolType::End => "EOF".to_string(),
|
||||
SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
|
||||
.name
|
||||
.clone(),
|
||||
SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
|
||||
SymbolType::Terminal => {
|
||||
let variable = &self.lexical_grammar.variables[symbol.index];
|
||||
if variable.kind == VariableType::Named {
|
||||
variable.name.clone()
|
||||
} else {
|
||||
format!("'{}'", &variable.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn populate_following_tokens(
|
||||
result: &mut Vec<TokenSet>,
|
||||
grammar: &SyntaxGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
builder: &ParseItemSetBuilder,
|
||||
) {
|
||||
let productions = grammar
|
||||
.variables
|
||||
.iter()
|
||||
.flat_map(|v| &v.productions)
|
||||
.chain(&inlines.productions);
|
||||
for production in productions {
|
||||
for i in 1..production.steps.len() {
|
||||
let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
|
||||
let right_tokens = builder.first_set(&production.steps[i].symbol);
|
||||
for left_token in left_tokens.iter() {
|
||||
if left_token.is_terminal() {
|
||||
result[left_token.index].insert_all_terminals(right_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build_parse_table(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(ParseTable, Vec<TokenSet>)> {
|
||||
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
||||
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
|
||||
populate_following_tokens(
|
||||
&mut following_tokens,
|
||||
syntax_grammar,
|
||||
inlines,
|
||||
&item_set_builder,
|
||||
);
|
||||
|
||||
let table = ParseTableBuilder {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
state_ids_to_log,
|
||||
item_set_builder,
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
item_sets_by_state_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
parse_table: ParseTable {
|
||||
states: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
alias_sequences: Vec::new(),
|
||||
max_aliased_production_length: 0,
|
||||
},
|
||||
}
|
||||
.build()?;
|
||||
|
||||
Ok((table, following_tokens))
|
||||
}
|
||||
75
cli/src/generate/build_tables/coincident_tokens.rs
Normal file
75
cli/src/generate/build_tables/coincident_tokens.rs
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
use crate::generate::grammars::LexicalGrammar;
|
||||
use crate::generate::rules::Symbol;
|
||||
use crate::generate::tables::{ParseStateId, ParseTable};
|
||||
use std::fmt;
|
||||
|
||||
pub(crate) struct CoincidentTokenIndex<'a> {
|
||||
entries: Vec<Vec<ParseStateId>>,
|
||||
grammar: &'a LexicalGrammar,
|
||||
n: usize,
|
||||
}
|
||||
|
||||
impl<'a> CoincidentTokenIndex<'a> {
|
||||
pub fn new(table: &ParseTable, lexical_grammar: &'a LexicalGrammar) -> Self {
|
||||
let n = lexical_grammar.variables.len();
|
||||
let mut result = Self {
|
||||
n,
|
||||
grammar: lexical_grammar,
|
||||
entries: vec![Vec::new(); n * n],
|
||||
};
|
||||
for (i, state) in table.states.iter().enumerate() {
|
||||
for symbol in state.terminal_entries.keys() {
|
||||
if symbol.is_terminal() {
|
||||
for other_symbol in state.terminal_entries.keys() {
|
||||
if other_symbol.is_terminal() {
|
||||
let index = result.index(symbol.index, other_symbol.index);
|
||||
if result.entries[index].last().cloned() != Some(i) {
|
||||
result.entries[index].push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn states_with(&self, a: Symbol, b: Symbol) -> &Vec<ParseStateId> {
|
||||
&self.entries[self.index(a.index, b.index)]
|
||||
}
|
||||
|
||||
pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
|
||||
!self.entries[self.index(a.index, b.index)].is_empty()
|
||||
}
|
||||
|
||||
fn index(&self, a: usize, b: usize) -> usize {
|
||||
if a < b {
|
||||
a * self.n + b
|
||||
} else {
|
||||
b * self.n + a
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for CoincidentTokenIndex<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "CoincidentTokenIndex {{\n")?;
|
||||
|
||||
write!(f, " entries: {{\n")?;
|
||||
for i in 0..self.n {
|
||||
write!(f, " {}: {{\n", self.grammar.variables[i].name)?;
|
||||
for j in 0..self.n {
|
||||
write!(
|
||||
f,
|
||||
" {}: {:?},\n",
|
||||
self.grammar.variables[j].name,
|
||||
self.entries[self.index(i, j)].len()
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
}
|
||||
write!(f, " }},")?;
|
||||
write!(f, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
496
cli/src/generate/build_tables/item.rs
Normal file
496
cli/src/generate/build_tables/item.rs
Normal file
|
|
@ -0,0 +1,496 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
|
||||
use crate::generate::rules::Associativity;
|
||||
use crate::generate::rules::{Symbol, SymbolType};
|
||||
use lazy_static::lazy_static;
|
||||
use smallbitvec::SmallBitVec;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::iter::FromIterator;
|
||||
use std::u32;
|
||||
|
||||
lazy_static! {
|
||||
static ref START_PRODUCTION: Production = Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep {
|
||||
symbol: Symbol {
|
||||
index: 0,
|
||||
kind: SymbolType::NonTerminal,
|
||||
},
|
||||
precedence: 0,
|
||||
associativity: None,
|
||||
alias: None,
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct TokenSet {
|
||||
terminal_bits: SmallBitVec,
|
||||
external_bits: SmallBitVec,
|
||||
eof: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct ParseItem<'a> {
|
||||
pub variable_index: u32,
|
||||
pub step_index: u32,
|
||||
pub production: &'a Production,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSet<'a> {
|
||||
pub entries: Vec<(ParseItem<'a>, TokenSet)>,
|
||||
}
|
||||
|
||||
pub(crate) struct ParseItemDisplay<'a>(
|
||||
pub &'a ParseItem<'a>,
|
||||
pub &'a SyntaxGrammar,
|
||||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
pub(crate) struct TokenSetDisplay<'a>(
|
||||
pub &'a TokenSet,
|
||||
pub &'a SyntaxGrammar,
|
||||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct ParseItemSetDisplay<'a>(
|
||||
pub &'a ParseItemSet<'a>,
|
||||
pub &'a SyntaxGrammar,
|
||||
pub &'a LexicalGrammar,
|
||||
);
|
||||
|
||||
impl TokenSet {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
terminal_bits: SmallBitVec::new(),
|
||||
external_bits: SmallBitVec::new(),
|
||||
eof: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
|
||||
self.terminal_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.chain(
|
||||
self.external_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::external(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}),
|
||||
)
|
||||
.chain(if self.eof { Some(Symbol::end()) } else { None })
|
||||
}
|
||||
|
||||
pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
|
||||
self.terminal_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn contains(&self, symbol: &Symbol) -> bool {
|
||||
match symbol.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::End => self.eof,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_terminal(&self, index: usize) -> bool {
|
||||
self.terminal_bits.get(index).unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, other: Symbol) {
|
||||
let vec = match other.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
|
||||
SymbolType::Terminal => &mut self.terminal_bits,
|
||||
SymbolType::External => &mut self.external_bits,
|
||||
SymbolType::End => {
|
||||
self.eof = true;
|
||||
return;
|
||||
}
|
||||
};
|
||||
if other.index >= vec.len() {
|
||||
vec.resize(other.index + 1, false);
|
||||
}
|
||||
vec.set(other.index, true);
|
||||
}
|
||||
|
||||
pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.terminal_bits.len() > self.terminal_bits.len() {
|
||||
self.terminal_bits.resize(other.terminal_bits.len(), false);
|
||||
}
|
||||
for (i, element) in other.terminal_bits.iter().enumerate() {
|
||||
if element {
|
||||
result |= !self.terminal_bits[i];
|
||||
self.terminal_bits.set(i, element);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.external_bits.len() > self.external_bits.len() {
|
||||
self.external_bits.resize(other.external_bits.len(), false);
|
||||
}
|
||||
for (i, element) in other.external_bits.iter().enumerate() {
|
||||
if element {
|
||||
result |= !self.external_bits[i];
|
||||
self.external_bits.set(i, element);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
|
||||
let mut result = false;
|
||||
if other.eof {
|
||||
result |= !self.eof;
|
||||
self.eof = true;
|
||||
}
|
||||
result |= self.insert_all_terminals(other);
|
||||
result |= self.insert_all_externals(other);
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<Symbol> for TokenSet {
|
||||
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
|
||||
let mut result = Self::new();
|
||||
for symbol in iter {
|
||||
result.insert(symbol);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseItem<'a> {
|
||||
pub fn start() -> Self {
|
||||
ParseItem {
|
||||
variable_index: u32::MAX,
|
||||
production: &START_PRODUCTION,
|
||||
step_index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn step(&self) -> Option<&'a ProductionStep> {
|
||||
self.production.steps.get(self.step_index as usize)
|
||||
}
|
||||
|
||||
pub fn symbol(&self) -> Option<Symbol> {
|
||||
self.step().map(|step| step.symbol)
|
||||
}
|
||||
|
||||
pub fn associativity(&self) -> Option<Associativity> {
|
||||
self.prev_step().and_then(|step| step.associativity)
|
||||
}
|
||||
|
||||
pub fn precedence(&self) -> i32 {
|
||||
self.prev_step().map_or(0, |step| step.precedence)
|
||||
}
|
||||
|
||||
pub fn prev_step(&self) -> Option<&'a ProductionStep> {
|
||||
if self.step_index > 0 {
|
||||
Some(&self.production.steps[self.step_index as usize - 1])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_done(&self) -> bool {
|
||||
self.step_index as usize == self.production.steps.len()
|
||||
}
|
||||
|
||||
pub fn is_augmented(&self) -> bool {
|
||||
self.variable_index == u32::MAX
|
||||
}
|
||||
|
||||
pub fn successor(&self) -> ParseItem<'a> {
|
||||
ParseItem {
|
||||
variable_index: self.variable_index,
|
||||
production: self.production,
|
||||
step_index: self.step_index + 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseItemSet<'a> {
|
||||
pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
|
||||
let mut result = Self::default();
|
||||
for (item, lookaheads) in elements {
|
||||
result.insert(item, &lookaheads);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, item: ParseItem<'a>, lookaheads: &TokenSet) -> &mut TokenSet {
|
||||
match self.entries.binary_search_by(|(i, _)| i.cmp(&item)) {
|
||||
Err(i) => {
|
||||
self.entries.insert(i, (item, lookaheads.clone()));
|
||||
&mut self.entries[i].1
|
||||
}
|
||||
Ok(i) => {
|
||||
self.entries[i].1.insert_all(lookaheads);
|
||||
&mut self.entries[i].1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hash_unfinished_items(&self, h: &mut impl Hasher) {
|
||||
let mut previous_variable_index = u32::MAX;
|
||||
let mut previous_step_index = u32::MAX;
|
||||
for (item, _) in self.entries.iter() {
|
||||
if item.step().is_some()
|
||||
&& (item.variable_index != previous_variable_index
|
||||
|| item.step_index != previous_step_index)
|
||||
{
|
||||
h.write_u32(item.variable_index);
|
||||
h.write_u32(item.step_index);
|
||||
previous_variable_index = item.variable_index;
|
||||
previous_step_index = item.step_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Default for ParseItemSet<'a> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
if self.0.is_augmented() {
|
||||
write!(f, "START →")?;
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"{} →",
|
||||
&self.1.variables[self.0.variable_index as usize].name
|
||||
)?;
|
||||
}
|
||||
|
||||
for (i, step) in self.0.production.steps.iter().enumerate() {
|
||||
if i == self.0.step_index as usize {
|
||||
write!(f, " •")?;
|
||||
if step.precedence != 0 || step.associativity.is_some() {
|
||||
write!(
|
||||
f,
|
||||
" (prec {:?} assoc {:?})",
|
||||
step.precedence, step.associativity
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
write!(f, " ")?;
|
||||
if step.symbol.is_terminal() {
|
||||
if let Some(variable) = self.2.variables.get(step.symbol.index) {
|
||||
write!(f, "{}", &variable.name)?;
|
||||
} else {
|
||||
write!(f, "{}-{}", "terminal", step.symbol.index)?;
|
||||
}
|
||||
} else if step.symbol.is_external() {
|
||||
write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
|
||||
} else {
|
||||
write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
|
||||
}
|
||||
|
||||
if let Some(alias) = &step.alias {
|
||||
write!(f, " (alias {})", alias.value)?;
|
||||
}
|
||||
}
|
||||
|
||||
if self.0.is_done() {
|
||||
write!(f, " •")?;
|
||||
if let Some(step) = self.0.production.steps.last() {
|
||||
if step.precedence != 0 || step.associativity.is_some() {
|
||||
write!(
|
||||
f,
|
||||
" (prec {:?} assoc {:?})",
|
||||
step.precedence, step.associativity
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for TokenSetDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "[")?;
|
||||
for (i, symbol) in self.0.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
|
||||
if symbol.is_terminal() {
|
||||
if let Some(variable) = self.2.variables.get(symbol.index) {
|
||||
write!(f, "{}", &variable.name)?;
|
||||
} else {
|
||||
write!(f, "{}-{}", "terminal", symbol.index)?;
|
||||
}
|
||||
} else if symbol.is_external() {
|
||||
write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
|
||||
} else {
|
||||
write!(f, "{}", &self.1.variables[symbol.index].name)?;
|
||||
}
|
||||
}
|
||||
write!(f, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
for (item, lookaheads) in self.0.entries.iter() {
|
||||
writeln!(
|
||||
f,
|
||||
"{}\t{}",
|
||||
ParseItemDisplay(item, self.1, self.2),
|
||||
TokenSetDisplay(lookaheads, self.1, self.2)
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Hash for ParseItem<'a> {
|
||||
fn hash<H: Hasher>(&self, hasher: &mut H) {
|
||||
hasher.write_u32(self.variable_index);
|
||||
hasher.write_u32(self.step_index);
|
||||
hasher.write_i32(self.production.dynamic_precedence);
|
||||
hasher.write_usize(self.production.steps.len());
|
||||
hasher.write_i32(self.precedence());
|
||||
self.associativity().hash(hasher);
|
||||
for step in &self.production.steps[0..self.step_index as usize] {
|
||||
step.alias.hash(hasher);
|
||||
}
|
||||
for step in &self.production.steps[self.step_index as usize..] {
|
||||
step.hash(hasher);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for ParseItem<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
if self.variable_index != other.variable_index
|
||||
|| self.step_index != other.step_index
|
||||
|| self.production.dynamic_precedence != other.production.dynamic_precedence
|
||||
|| self.production.steps.len() != other.production.steps.len()
|
||||
|| self.precedence() != other.precedence()
|
||||
|| self.associativity() != other.associativity()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i, step) in self.production.steps.iter().enumerate() {
|
||||
if i < self.step_index as usize {
|
||||
if step.alias != other.production.steps[i].alias {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if *step != other.production.steps[i] {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Ord for ParseItem<'a> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
let o = self.step_index.cmp(&other.step_index);
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
let o = self.variable_index.cmp(&other.variable_index);
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
let o = self
|
||||
.production
|
||||
.dynamic_precedence
|
||||
.cmp(&other.production.dynamic_precedence);
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
let o = self
|
||||
.production
|
||||
.steps
|
||||
.len()
|
||||
.cmp(&other.production.steps.len());
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
let o = self.precedence().cmp(&other.precedence());
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
let o = self.associativity().cmp(&other.associativity());
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
for (i, step) in self.production.steps.iter().enumerate() {
|
||||
let o = if i < self.step_index as usize {
|
||||
step.alias.cmp(&other.production.steps[i].alias)
|
||||
} else {
|
||||
step.cmp(&other.production.steps[i])
|
||||
};
|
||||
if o != Ordering::Equal {
|
||||
return o;
|
||||
}
|
||||
}
|
||||
return Ordering::Equal;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialOrd for ParseItem<'a> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Eq for ParseItem<'a> {}
|
||||
|
||||
impl<'a> Hash for ParseItemSet<'a> {
|
||||
fn hash<H: Hasher>(&self, hasher: &mut H) {
|
||||
hasher.write_usize(self.entries.len());
|
||||
for (item, lookaheads) in self.entries.iter() {
|
||||
item.hash(hasher);
|
||||
lookaheads.hash(hasher);
|
||||
}
|
||||
}
|
||||
}
|
||||
354
cli/src/generate/build_tables/item_set_builder.rs
Normal file
354
cli/src/generate/build_tables/item_set_builder.rs
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet, TokenSetDisplay};
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Symbol, SymbolType};
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
struct TransitiveClosureAddition<'a> {
|
||||
item: ParseItem<'a>,
|
||||
info: FollowSetInfo,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
struct FollowSetInfo {
|
||||
lookaheads: TokenSet,
|
||||
propagates_lookaheads: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct ParseItemSetBuilder<'a> {
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
first_sets: HashMap<Symbol, TokenSet>,
|
||||
last_sets: HashMap<Symbol, TokenSet>,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
|
||||
}
|
||||
|
||||
fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
|
||||
if !vector.contains(&value) {
|
||||
vector.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseItemSetBuilder<'a> {
|
||||
pub fn new(
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
) -> Self {
|
||||
let mut result = Self {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
first_sets: HashMap::new(),
|
||||
last_sets: HashMap::new(),
|
||||
inlines,
|
||||
transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
|
||||
};
|
||||
|
||||
// For each grammar symbol, populate the FIRST and LAST sets: the set of
|
||||
// terminals that appear at the beginning and end that symbol's productions,
|
||||
// respectively.
|
||||
//
|
||||
// For a terminal symbol, the FIRST and LAST set just consists of the
|
||||
// terminal itself.
|
||||
for i in 0..lexical_grammar.variables.len() {
|
||||
let symbol = Symbol::terminal(i);
|
||||
let mut set = TokenSet::new();
|
||||
set.insert(symbol);
|
||||
result.first_sets.insert(symbol, set.clone());
|
||||
result.last_sets.insert(symbol, set);
|
||||
}
|
||||
|
||||
for i in 0..syntax_grammar.external_tokens.len() {
|
||||
let symbol = Symbol::external(i);
|
||||
let mut set = TokenSet::new();
|
||||
set.insert(symbol);
|
||||
result.first_sets.insert(symbol, set.clone());
|
||||
result.last_sets.insert(symbol, set);
|
||||
}
|
||||
|
||||
// The FIRST set of a non-terminal `i` is the union of the following sets:
|
||||
// * the set of all terminals that appear at the beginings of i's productions
|
||||
// * the FIRST sets of all the non-terminals that appear at the beginnings
|
||||
// of i's productions
|
||||
//
|
||||
// Rather than computing these sets using recursion, we use an explicit stack
|
||||
// called `symbols_to_process`.
|
||||
let mut symbols_to_process = Vec::new();
|
||||
let mut processed_non_terminals = HashSet::new();
|
||||
for i in 0..syntax_grammar.variables.len() {
|
||||
let symbol = Symbol::non_terminal(i);
|
||||
|
||||
let first_set = &mut result.first_sets.entry(symbol).or_insert(TokenSet::new());
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push(symbol);
|
||||
while let Some(current_symbol) = symbols_to_process.pop() {
|
||||
if current_symbol.is_terminal() || current_symbol.is_external() {
|
||||
first_set.insert(current_symbol);
|
||||
} else if processed_non_terminals.insert(current_symbol) {
|
||||
for production in syntax_grammar.variables[current_symbol.index]
|
||||
.productions
|
||||
.iter()
|
||||
{
|
||||
if let Some(step) = production.steps.first() {
|
||||
symbols_to_process.push(step.symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The LAST set is defined in a similar way to the FIRST set.
|
||||
let last_set = &mut result.last_sets.entry(symbol).or_insert(TokenSet::new());
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push(symbol);
|
||||
while let Some(current_symbol) = symbols_to_process.pop() {
|
||||
if current_symbol.is_terminal() || current_symbol.is_external() {
|
||||
last_set.insert(current_symbol);
|
||||
} else if processed_non_terminals.insert(current_symbol) {
|
||||
for production in syntax_grammar.variables[current_symbol.index]
|
||||
.productions
|
||||
.iter()
|
||||
{
|
||||
if let Some(step) = production.steps.last() {
|
||||
symbols_to_process.push(step.symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// To compute an item set's transitive closure, we find each item in the set
|
||||
// whose next symbol is a non-terminal, and we add new items to the set for
|
||||
// each of that symbols' productions. These productions might themselves begin
|
||||
// with non-terminals, so the process continues recursively. In this process,
|
||||
// the total set of entries that get added depends only on two things:
|
||||
// * the set of non-terminal symbols that occur at each item's current position
|
||||
// * the set of terminals that occurs after each of these non-terminal symbols
|
||||
//
|
||||
// So we can avoid a lot of duplicated recursive work by precomputing, for each
|
||||
// non-terminal symbol `i`, a final list of *additions* that must be made to an
|
||||
// item set when `i` occurs as the next symbol in one if its core items. The
|
||||
// structure of an *addition* is as follows:
|
||||
// * `item` - the new item that must be added as part of the expansion of `i`
|
||||
// * `lookaheads` - lookahead tokens that can always come after that item in
|
||||
// the expansion of `i`
|
||||
// * `propagates_lookaheads` - a boolean indicating whether or not `item` can
|
||||
// occur at the *end* of the expansion of `i`, so that i's own current
|
||||
// lookahead tokens can occur after `item`.
|
||||
//
|
||||
// Again, rather than computing these additions recursively, we use an explicit
|
||||
// stack called `entries_to_process`.
|
||||
for i in 0..syntax_grammar.variables.len() {
|
||||
let empty_lookaheads = TokenSet::new();
|
||||
let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
|
||||
|
||||
// First, build up a map whose keys are all of the non-terminals that can
|
||||
// appear at the beginning of non-terminal `i`, and whose values store
|
||||
// information about the tokens that can follow each non-terminal.
|
||||
let mut follow_set_info_by_non_terminal = HashMap::new();
|
||||
while let Some(entry) = entries_to_process.pop() {
|
||||
let (variable_index, lookaheads, propagates_lookaheads) = entry;
|
||||
let existing_info = follow_set_info_by_non_terminal
|
||||
.entry(variable_index)
|
||||
.or_insert_with(|| FollowSetInfo {
|
||||
lookaheads: TokenSet::new(),
|
||||
propagates_lookaheads: false,
|
||||
});
|
||||
|
||||
let did_add_follow_set_info;
|
||||
if propagates_lookaheads {
|
||||
did_add_follow_set_info = !existing_info.propagates_lookaheads;
|
||||
existing_info.propagates_lookaheads = true;
|
||||
} else {
|
||||
did_add_follow_set_info = existing_info.lookaheads.insert_all(lookaheads);
|
||||
}
|
||||
|
||||
if did_add_follow_set_info {
|
||||
for production in &syntax_grammar.variables[variable_index].productions {
|
||||
if let Some(symbol) = production.first_symbol() {
|
||||
if symbol.is_non_terminal() {
|
||||
if production.steps.len() == 1 {
|
||||
entries_to_process.push((
|
||||
symbol.index,
|
||||
lookaheads,
|
||||
propagates_lookaheads,
|
||||
));
|
||||
} else {
|
||||
entries_to_process.push((
|
||||
symbol.index,
|
||||
&result.first_sets[&production.steps[1].symbol],
|
||||
false,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store all of those non-terminals' productions, along with their associated
|
||||
// lookahead info, as *additions* associated with non-terminal `i`.
|
||||
let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
|
||||
for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
|
||||
let variable = &syntax_grammar.variables[variable_index];
|
||||
let non_terminal = Symbol::non_terminal(variable_index);
|
||||
let variable_index = variable_index as u32;
|
||||
if syntax_grammar.variables_to_inline.contains(&non_terminal) {
|
||||
continue;
|
||||
}
|
||||
for production in &variable.productions {
|
||||
let item = ParseItem {
|
||||
variable_index,
|
||||
production,
|
||||
step_index: 0,
|
||||
};
|
||||
|
||||
if let Some(inlined_productions) =
|
||||
inlines.inlined_productions(item.production, item.step_index)
|
||||
{
|
||||
for production in inlined_productions {
|
||||
find_or_push(
|
||||
additions_for_non_terminal,
|
||||
TransitiveClosureAddition {
|
||||
item: ParseItem {
|
||||
variable_index,
|
||||
production,
|
||||
step_index: item.step_index,
|
||||
},
|
||||
info: follow_set_info.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
} else {
|
||||
find_or_push(
|
||||
additions_for_non_terminal,
|
||||
TransitiveClosureAddition {
|
||||
item,
|
||||
info: follow_set_info.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
|
||||
let mut result = ParseItemSet::default();
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
if let Some(productions) = self
|
||||
.inlines
|
||||
.inlined_productions(item.production, item.step_index)
|
||||
{
|
||||
for production in productions {
|
||||
self.add_item(
|
||||
&mut result,
|
||||
ParseItem {
|
||||
variable_index: item.variable_index,
|
||||
production,
|
||||
step_index: item.step_index,
|
||||
},
|
||||
lookaheads,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
self.add_item(&mut result, *item, lookaheads);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn first_set(&self, symbol: &Symbol) -> &TokenSet {
|
||||
&self.first_sets[symbol]
|
||||
}
|
||||
|
||||
pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
|
||||
&self.last_sets[symbol]
|
||||
}
|
||||
|
||||
fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
|
||||
if let Some(step) = item.step() {
|
||||
if step.symbol.is_non_terminal() {
|
||||
let next_step = item.successor().step();
|
||||
|
||||
// Determine which tokens can follow this non-terminal.
|
||||
let following_tokens = if let Some(next_step) = next_step {
|
||||
self.first_sets.get(&next_step.symbol).unwrap()
|
||||
} else {
|
||||
&lookaheads
|
||||
};
|
||||
|
||||
// Use the pre-computed *additions* to expand the non-terminal.
|
||||
for addition in &self.transitive_closure_additions[step.symbol.index] {
|
||||
let lookaheads = set.insert(addition.item, &addition.info.lookaheads);
|
||||
if addition.info.propagates_lookaheads {
|
||||
lookaheads.insert_all(following_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
set.insert(item, lookaheads);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "ParseItemSetBuilder {{\n")?;
|
||||
|
||||
write!(f, " first_sets: {{\n")?;
|
||||
for (symbol, first_set) in &self.first_sets {
|
||||
let name = match symbol.kind {
|
||||
SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
|
||||
SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
|
||||
SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
|
||||
SymbolType::End => "END",
|
||||
};
|
||||
write!(
|
||||
f,
|
||||
" first({:?}): {}\n",
|
||||
name,
|
||||
TokenSetDisplay(first_set, &self.syntax_grammar, &self.lexical_grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }}\n")?;
|
||||
|
||||
write!(f, " last_sets: {{\n")?;
|
||||
for (symbol, last_set) in &self.last_sets {
|
||||
let name = match symbol.kind {
|
||||
SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
|
||||
SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
|
||||
SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
|
||||
SymbolType::End => "END",
|
||||
};
|
||||
write!(
|
||||
f,
|
||||
" last({:?}): {}\n",
|
||||
name,
|
||||
TokenSetDisplay(last_set, &self.syntax_grammar, &self.lexical_grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }}\n")?;
|
||||
|
||||
write!(f, " additions: {{\n")?;
|
||||
for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
|
||||
write!(f, " {}: {{\n", variable.name)?;
|
||||
for addition in &self.transitive_closure_additions[i] {
|
||||
write!(
|
||||
f,
|
||||
" {}\n",
|
||||
ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
}
|
||||
write!(f, " }},")?;
|
||||
|
||||
write!(f, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
291
cli/src/generate/build_tables/minimize_parse_table.rs
Normal file
291
cli/src/generate/build_tables/minimize_parse_table.rs
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
use super::item::TokenSet;
|
||||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::generate::rules::{AliasMap, Symbol};
|
||||
use crate::generate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use log::info;
|
||||
|
||||
pub(crate) fn minimize_parse_table(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
keywords: &TokenSet,
|
||||
) {
|
||||
let mut minimizer = Minimizer {
|
||||
parse_table,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
token_conflict_map,
|
||||
keywords,
|
||||
simple_aliases,
|
||||
};
|
||||
minimizer.remove_unit_reductions();
|
||||
minimizer.merge_compatible_states();
|
||||
minimizer.remove_unused_states();
|
||||
}
|
||||
|
||||
struct Minimizer<'a> {
|
||||
parse_table: &'a mut ParseTable,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
token_conflict_map: &'a TokenConflictMap<'a>,
|
||||
keywords: &'a TokenSet,
|
||||
simple_aliases: &'a AliasMap,
|
||||
}
|
||||
|
||||
impl<'a> Minimizer<'a> {
|
||||
fn remove_unit_reductions(&mut self) {
|
||||
let mut aliased_symbols = HashSet::new();
|
||||
for variable in &self.syntax_grammar.variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if step.alias.is_some() {
|
||||
aliased_symbols.insert(step.symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut unit_reduction_symbols_by_state = HashMap::new();
|
||||
for (i, state) in self.parse_table.states.iter().enumerate() {
|
||||
let mut only_unit_reductions = true;
|
||||
let mut unit_reduction_symbol = None;
|
||||
for (_, entry) in &state.terminal_entries {
|
||||
for action in &entry.actions {
|
||||
match action {
|
||||
ParseAction::ShiftExtra => continue,
|
||||
ParseAction::Reduce {
|
||||
child_count: 1,
|
||||
alias_sequence_id: 0,
|
||||
symbol,
|
||||
..
|
||||
} => {
|
||||
if !self.simple_aliases.contains_key(&symbol)
|
||||
&& !aliased_symbols.contains(&symbol)
|
||||
&& self.syntax_grammar.variables[symbol.index].kind
|
||||
!= VariableType::Named
|
||||
&& (unit_reduction_symbol.is_none()
|
||||
|| unit_reduction_symbol == Some(symbol))
|
||||
{
|
||||
unit_reduction_symbol = Some(symbol);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
only_unit_reductions = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if !only_unit_reductions {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(symbol) = unit_reduction_symbol {
|
||||
if only_unit_reductions {
|
||||
unit_reduction_symbols_by_state.insert(i, *symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for state in self.parse_table.states.iter_mut() {
|
||||
let mut done = false;
|
||||
while !done {
|
||||
done = true;
|
||||
state.update_referenced_states(|other_state_id, state| {
|
||||
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
|
||||
done = false;
|
||||
state.nonterminal_entries[symbol]
|
||||
} else {
|
||||
other_state_id
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_compatible_states(&mut self) {
|
||||
let mut state_ids_by_signature = HashMap::new();
|
||||
for (i, state) in self.parse_table.states.iter().enumerate() {
|
||||
state_ids_by_signature
|
||||
.entry(state.unfinished_item_signature)
|
||||
.or_insert(Vec::new())
|
||||
.push(i);
|
||||
}
|
||||
|
||||
let mut deleted_states = HashSet::new();
|
||||
loop {
|
||||
let mut state_replacements = HashMap::new();
|
||||
for (_, state_ids) in &state_ids_by_signature {
|
||||
for i in state_ids {
|
||||
for j in state_ids {
|
||||
if j == i {
|
||||
break;
|
||||
}
|
||||
if deleted_states.contains(j) || deleted_states.contains(i) {
|
||||
continue;
|
||||
}
|
||||
if self.merge_parse_state(*j, *i) {
|
||||
deleted_states.insert(*i);
|
||||
state_replacements.insert(*i, *j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if state_replacements.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
for state in self.parse_table.states.iter_mut() {
|
||||
state.update_referenced_states(|other_state_id, _| {
|
||||
*state_replacements
|
||||
.get(&other_state_id)
|
||||
.unwrap_or(&other_state_id)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_parse_state(&mut self, left: usize, right: usize) -> bool {
|
||||
let left_state = &self.parse_table.states[left];
|
||||
let right_state = &self.parse_table.states[right];
|
||||
|
||||
if left_state.nonterminal_entries != right_state.nonterminal_entries {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (symbol, left_entry) in &left_state.terminal_entries {
|
||||
if let Some(right_entry) = right_state.terminal_entries.get(symbol) {
|
||||
if right_entry.actions != left_entry.actions {
|
||||
return false;
|
||||
}
|
||||
} else if !self.can_add_entry_to_state(right_state, *symbol, left_entry) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
let mut symbols_to_add = Vec::new();
|
||||
for (symbol, right_entry) in &right_state.terminal_entries {
|
||||
if !left_state.terminal_entries.contains_key(&symbol) {
|
||||
if !self.can_add_entry_to_state(left_state, *symbol, right_entry) {
|
||||
return false;
|
||||
}
|
||||
symbols_to_add.push(*symbol);
|
||||
}
|
||||
}
|
||||
|
||||
for symbol in symbols_to_add {
|
||||
let entry = self.parse_table.states[right].terminal_entries[&symbol].clone();
|
||||
self.parse_table.states[left]
|
||||
.terminal_entries
|
||||
.insert(symbol, entry);
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn can_add_entry_to_state(
|
||||
&self,
|
||||
state: &ParseState,
|
||||
token: Symbol,
|
||||
entry: &ParseTableEntry,
|
||||
) -> bool {
|
||||
// Do not add external tokens; they could conflict lexically with any of the state's
|
||||
// existing lookahead tokens.
|
||||
if token.is_external() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only merge_compatible_states parse states by allowing existing reductions to happen
|
||||
// with additional lookahead tokens. Do not alter parse states in ways
|
||||
// that allow entirely new types of actions to happen.
|
||||
if state.terminal_entries.iter().all(|(_, e)| e != entry) {
|
||||
return false;
|
||||
}
|
||||
match entry.actions.last() {
|
||||
Some(ParseAction::Reduce { .. }) => {}
|
||||
_ => return false,
|
||||
}
|
||||
|
||||
// Do not add tokens which are both internal and external. Their validity could
|
||||
// influence the behavior of the external scanner.
|
||||
if self
|
||||
.syntax_grammar
|
||||
.external_tokens
|
||||
.iter()
|
||||
.any(|t| t.corresponding_internal_token == Some(token))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
let is_word_token = self.syntax_grammar.word_token == Some(token);
|
||||
let is_keyword = self.keywords.contains(&token);
|
||||
|
||||
// Do not add a token if it conflicts with an existing token.
|
||||
if token.is_terminal() {
|
||||
for existing_token in state.terminal_entries.keys() {
|
||||
if (is_word_token || is_keyword)
|
||||
&& (self.keywords.contains(existing_token)
|
||||
|| self.syntax_grammar.word_token.as_ref() == Some(existing_token))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if self
|
||||
.token_conflict_map
|
||||
.does_conflict(token.index, existing_token.index)
|
||||
|| self
|
||||
.token_conflict_map
|
||||
.does_match_same_string(token.index, existing_token.index)
|
||||
{
|
||||
info!(
|
||||
"can't merge parse states because of conflict between {} and {}",
|
||||
self.lexical_grammar.variables[token.index].name,
|
||||
self.lexical_grammar.variables[existing_token.index].name
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn remove_unused_states(&mut self) {
|
||||
let mut state_usage_map = vec![false; self.parse_table.states.len()];
|
||||
|
||||
state_usage_map[0] = true;
|
||||
state_usage_map[1] = true;
|
||||
|
||||
for state in &self.parse_table.states {
|
||||
for referenced_state in state.referenced_states() {
|
||||
state_usage_map[referenced_state] = true;
|
||||
}
|
||||
}
|
||||
let mut removed_predecessor_count = 0;
|
||||
let mut state_replacement_map = vec![0; self.parse_table.states.len()];
|
||||
for state_id in 0..self.parse_table.states.len() {
|
||||
state_replacement_map[state_id] = state_id - removed_predecessor_count;
|
||||
if !state_usage_map[state_id] {
|
||||
removed_predecessor_count += 1;
|
||||
}
|
||||
}
|
||||
let mut state_id = 0;
|
||||
let mut original_state_id = 0;
|
||||
while state_id < self.parse_table.states.len() {
|
||||
if state_usage_map[original_state_id] {
|
||||
self.parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
|
||||
state_replacement_map[other_state_id]
|
||||
});
|
||||
state_id += 1;
|
||||
} else {
|
||||
self.parse_table.states.remove(state_id);
|
||||
}
|
||||
original_state_id += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
350
cli/src/generate/build_tables/mod.rs
Normal file
350
cli/src/generate/build_tables/mod.rs
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
mod build_lex_table;
|
||||
mod build_parse_table;
|
||||
mod coincident_tokens;
|
||||
mod item;
|
||||
mod item_set_builder;
|
||||
mod minimize_parse_table;
|
||||
mod token_conflicts;
|
||||
|
||||
use self::build_lex_table::build_lex_table;
|
||||
use self::build_parse_table::build_parse_table;
|
||||
use self::coincident_tokens::CoincidentTokenIndex;
|
||||
use self::item::TokenSet;
|
||||
use self::minimize_parse_table::minimize_parse_table;
|
||||
use self::token_conflicts::TokenConflictMap;
|
||||
use crate::error::Result;
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
|
||||
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use log::info;
|
||||
|
||||
pub(crate) fn build_tables(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
inlines: &InlinedProductionMap,
|
||||
minimize: bool,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
|
||||
let (mut parse_table, following_tokens) =
|
||||
build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?;
|
||||
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
|
||||
let keywords = identify_keywords(
|
||||
lexical_grammar,
|
||||
&parse_table,
|
||||
syntax_grammar.word_token,
|
||||
&token_conflict_map,
|
||||
&coincident_token_index,
|
||||
);
|
||||
populate_error_state(
|
||||
&mut parse_table,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&coincident_token_index,
|
||||
&token_conflict_map,
|
||||
&keywords,
|
||||
);
|
||||
populate_used_symbols(&mut parse_table, syntax_grammar, lexical_grammar);
|
||||
mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
|
||||
if minimize {
|
||||
minimize_parse_table(
|
||||
&mut parse_table,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
&token_conflict_map,
|
||||
&keywords,
|
||||
);
|
||||
}
|
||||
let (main_lex_table, keyword_lex_table) = build_lex_table(
|
||||
&mut parse_table,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&keywords,
|
||||
&coincident_token_index,
|
||||
&token_conflict_map,
|
||||
minimize,
|
||||
);
|
||||
Ok((
|
||||
parse_table,
|
||||
main_lex_table,
|
||||
keyword_lex_table,
|
||||
syntax_grammar.word_token,
|
||||
))
|
||||
}
|
||||
|
||||
fn populate_error_state(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
coincident_token_index: &CoincidentTokenIndex,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
keywords: &TokenSet,
|
||||
) {
|
||||
let state = &mut parse_table.states[0];
|
||||
let n = lexical_grammar.variables.len();
|
||||
|
||||
// First identify the *conflict-free tokens*: tokens that do not overlap with
|
||||
// any other token in any way, besides matching exactly the same string.
|
||||
let conflict_free_tokens: TokenSet = (0..n)
|
||||
.into_iter()
|
||||
.filter_map(|i| {
|
||||
let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
|
||||
j != i
|
||||
&& !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
|
||||
&& token_conflict_map.does_match_shorter_or_longer(i, j)
|
||||
});
|
||||
if conflicts_with_other_tokens {
|
||||
None
|
||||
} else {
|
||||
info!(
|
||||
"error recovery - token {} has no conflicts",
|
||||
lexical_grammar.variables[i].name
|
||||
);
|
||||
Some(Symbol::terminal(i))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let recover_entry = ParseTableEntry {
|
||||
reusable: false,
|
||||
actions: vec![ParseAction::Recover],
|
||||
};
|
||||
|
||||
// Exclude from the error-recovery state any token that conflicts with one of
|
||||
// the *conflict-free tokens* identified above.
|
||||
for i in 0..n {
|
||||
let symbol = Symbol::terminal(i);
|
||||
if !conflict_free_tokens.contains(&symbol) && !keywords.contains(&symbol) {
|
||||
if syntax_grammar.word_token != Some(symbol) {
|
||||
if let Some(t) = conflict_free_tokens.iter().find(|t| {
|
||||
!coincident_token_index.contains(symbol, *t)
|
||||
&& token_conflict_map.does_conflict(symbol.index, t.index)
|
||||
}) {
|
||||
info!(
|
||||
"error recovery - exclude token {} because of conflict with {}",
|
||||
lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
info!(
|
||||
"error recovery - include token {}",
|
||||
lexical_grammar.variables[i].name
|
||||
);
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(symbol)
|
||||
.or_insert_with(|| recover_entry.clone());
|
||||
}
|
||||
|
||||
for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
|
||||
if external_token.corresponding_internal_token.is_none() {
|
||||
state
|
||||
.terminal_entries
|
||||
.entry(Symbol::external(i))
|
||||
.or_insert_with(|| recover_entry.clone());
|
||||
}
|
||||
}
|
||||
|
||||
state.terminal_entries.insert(Symbol::end(), recover_entry);
|
||||
}
|
||||
|
||||
fn populate_used_symbols(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) {
|
||||
let mut terminal_usages = vec![false; lexical_grammar.variables.len()];
|
||||
let mut non_terminal_usages = vec![false; syntax_grammar.variables.len()];
|
||||
let mut external_usages = vec![false; syntax_grammar.external_tokens.len()];
|
||||
for state in &parse_table.states {
|
||||
for symbol in state.terminal_entries.keys() {
|
||||
match symbol.kind {
|
||||
SymbolType::Terminal => terminal_usages[symbol.index] = true,
|
||||
SymbolType::External => external_usages[symbol.index] = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
for symbol in state.nonterminal_entries.keys() {
|
||||
non_terminal_usages[symbol.index] = true;
|
||||
}
|
||||
}
|
||||
parse_table.symbols.push(Symbol::end());
|
||||
for (i, value) in terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
// Assign the grammar's word token a low numerical index. This ensures that
|
||||
// it can be stored in a subtree with no heap allocations, even for grammars with
|
||||
// very large numbers of tokens. This is an optimization, but it's also important to
|
||||
// ensure that a subtree's symbol can be successfully reassigned to the word token
|
||||
// without having to move the subtree to the heap.
|
||||
// See https://github.com/tree-sitter/tree-sitter/issues/258
|
||||
if syntax_grammar.word_token.map_or(false, |t| t.index == i) {
|
||||
parse_table.symbols.insert(1, Symbol::terminal(i));
|
||||
} else {
|
||||
parse_table.symbols.push(Symbol::terminal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i, value) in external_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
parse_table.symbols.push(Symbol::external(i));
|
||||
}
|
||||
}
|
||||
for (i, value) in non_terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
parse_table.symbols.push(Symbol::non_terminal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn identify_keywords(
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
parse_table: &ParseTable,
|
||||
word_token: Option<Symbol>,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
coincident_token_index: &CoincidentTokenIndex,
|
||||
) -> TokenSet {
|
||||
if word_token.is_none() {
|
||||
return TokenSet::new();
|
||||
}
|
||||
|
||||
let word_token = word_token.unwrap();
|
||||
let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
|
||||
|
||||
// First find all of the candidate keyword tokens: tokens that start with
|
||||
// letters or underscore and can match the same string as a word token.
|
||||
let keyword_candidates: TokenSet = lexical_grammar
|
||||
.variables
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, variable)| {
|
||||
cursor.reset(vec![variable.start_state]);
|
||||
if all_chars_are_alphabetical(&cursor)
|
||||
&& token_conflict_map.does_match_same_string(i, word_token.index)
|
||||
{
|
||||
info!(
|
||||
"Keywords - add candidate {}",
|
||||
lexical_grammar.variables[i].name
|
||||
);
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Exclude keyword candidates that shadow another keyword candidate.
|
||||
let keywords: TokenSet = keyword_candidates
|
||||
.iter()
|
||||
.filter(|token| {
|
||||
for other_token in keyword_candidates.iter() {
|
||||
if other_token != *token
|
||||
&& token_conflict_map.does_match_same_string(other_token.index, token.index)
|
||||
{
|
||||
info!(
|
||||
"Keywords - exclude {} because it matches the same string as {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
lexical_grammar.variables[other_token.index].name
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Exclude keyword candidates for which substituting the keyword capture
|
||||
// token would introduce new lexical conflicts with other tokens.
|
||||
let keywords = keywords
|
||||
.iter()
|
||||
.filter(|token| {
|
||||
for other_index in 0..lexical_grammar.variables.len() {
|
||||
if keyword_candidates.contains(&Symbol::terminal(other_index)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the word token was already valid in every state containing
|
||||
// this keyword candidate, then substituting the word token won't
|
||||
// introduce any new lexical conflicts.
|
||||
if coincident_token_index
|
||||
.states_with(*token, Symbol::terminal(other_index))
|
||||
.iter()
|
||||
.all(|state_id| {
|
||||
parse_table.states[*state_id]
|
||||
.terminal_entries
|
||||
.contains_key(&word_token)
|
||||
})
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if !token_conflict_map.has_same_conflict_status(
|
||||
token.index,
|
||||
word_token.index,
|
||||
other_index,
|
||||
) {
|
||||
info!(
|
||||
"Keywords - exclude {} because of conflict with {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
lexical_grammar.variables[other_index].name
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Keywords - include {}",
|
||||
lexical_grammar.variables[token.index].name,
|
||||
);
|
||||
true
|
||||
})
|
||||
.collect();
|
||||
|
||||
keywords
|
||||
}
|
||||
|
||||
fn mark_fragile_tokens(
|
||||
parse_table: &mut ParseTable,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
token_conflict_map: &TokenConflictMap,
|
||||
) {
|
||||
let n = lexical_grammar.variables.len();
|
||||
let mut valid_tokens_mask = Vec::with_capacity(n);
|
||||
for state in parse_table.states.iter_mut() {
|
||||
valid_tokens_mask.clear();
|
||||
valid_tokens_mask.resize(n, false);
|
||||
for token in state.terminal_entries.keys() {
|
||||
if token.is_terminal() {
|
||||
valid_tokens_mask[token.index] = true;
|
||||
}
|
||||
}
|
||||
for (token, entry) in state.terminal_entries.iter_mut() {
|
||||
if token.is_terminal() {
|
||||
for i in 0..n {
|
||||
if token_conflict_map.does_overlap(i, token.index) {
|
||||
if valid_tokens_mask[i] {
|
||||
entry.reusable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
|
||||
cursor.transition_chars().all(|(chars, is_sep)| {
|
||||
if is_sep {
|
||||
true
|
||||
} else if let CharacterSet::Include(chars) = chars {
|
||||
chars.iter().all(|c| c.is_alphabetic() || *c == '_')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
}
|
||||
486
cli/src/generate/build_tables/token_conflicts.rs
Normal file
486
cli/src/generate/build_tables/token_conflicts.rs
Normal file
|
|
@ -0,0 +1,486 @@
|
|||
use crate::generate::build_tables::item::{TokenSet, TokenSetDisplay};
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
|
||||
use hashbrown::HashSet;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
struct TokenConflictStatus {
|
||||
does_overlap: bool,
|
||||
does_match_valid_continuation: bool,
|
||||
does_match_separators: bool,
|
||||
matches_same_string: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct TokenConflictMap<'a> {
|
||||
n: usize,
|
||||
status_matrix: Vec<TokenConflictStatus>,
|
||||
following_tokens: Vec<TokenSet>,
|
||||
starting_chars_by_index: Vec<CharacterSet>,
|
||||
following_chars_by_index: Vec<CharacterSet>,
|
||||
grammar: &'a LexicalGrammar,
|
||||
}
|
||||
|
||||
impl<'a> TokenConflictMap<'a> {
|
||||
pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
|
||||
let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
|
||||
let starting_chars = get_starting_chars(&mut cursor, grammar);
|
||||
let following_chars = get_following_chars(&starting_chars, &following_tokens);
|
||||
|
||||
let n = grammar.variables.len();
|
||||
let mut status_matrix = vec![TokenConflictStatus::default(); n * n];
|
||||
for i in 0..grammar.variables.len() {
|
||||
for j in 0..i {
|
||||
let status = compute_conflict_status(&mut cursor, grammar, &following_chars, i, j);
|
||||
status_matrix[matrix_index(n, i, j)] = status.0;
|
||||
status_matrix[matrix_index(n, j, i)] = status.1;
|
||||
}
|
||||
}
|
||||
|
||||
TokenConflictMap {
|
||||
n,
|
||||
status_matrix,
|
||||
following_tokens,
|
||||
starting_chars_by_index: starting_chars,
|
||||
following_chars_by_index: following_chars,
|
||||
grammar,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
|
||||
let left = &self.status_matrix[matrix_index(self.n, a, other)];
|
||||
let right = &self.status_matrix[matrix_index(self.n, b, other)];
|
||||
left == right
|
||||
}
|
||||
|
||||
pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
|
||||
}
|
||||
|
||||
pub fn does_conflict(&self, i: usize, j: usize) -> bool {
|
||||
let entry = &self.status_matrix[matrix_index(self.n, i, j)];
|
||||
entry.does_match_valid_continuation
|
||||
|| entry.does_match_separators
|
||||
|| entry.matches_same_string
|
||||
}
|
||||
|
||||
pub fn does_match_shorter_or_longer(&self, i: usize, j: usize) -> bool {
|
||||
let entry = &self.status_matrix[matrix_index(self.n, i, j)];
|
||||
let reverse_entry = &self.status_matrix[matrix_index(self.n, j, i)];
|
||||
(entry.does_match_valid_continuation || entry.does_match_separators)
|
||||
&& !reverse_entry.does_match_separators
|
||||
}
|
||||
|
||||
pub fn does_overlap(&self, i: usize, j: usize) -> bool {
|
||||
self.status_matrix[matrix_index(self.n, i, j)].does_overlap
|
||||
}
|
||||
|
||||
pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
|
||||
if left.0 > right.0 {
|
||||
return true;
|
||||
} else if left.0 < right.0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
match grammar.variables[left.1]
|
||||
.implicit_precedence
|
||||
.cmp(&grammar.variables[right.1].implicit_precedence)
|
||||
{
|
||||
Ordering::Less => false,
|
||||
Ordering::Greater => true,
|
||||
Ordering::Equal => left.1 < right.1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prefer_transition(
|
||||
grammar: &LexicalGrammar,
|
||||
t: &NfaTransition,
|
||||
completed_id: usize,
|
||||
completed_precedence: i32,
|
||||
has_separator_transitions: bool,
|
||||
) -> bool {
|
||||
if t.precedence < completed_precedence {
|
||||
return false;
|
||||
}
|
||||
if t.precedence == completed_precedence {
|
||||
if t.is_separator {
|
||||
return false;
|
||||
}
|
||||
if has_separator_transitions
|
||||
&& grammar
|
||||
.variable_indices_for_nfa_states(&t.states)
|
||||
.position(|i| i == completed_id)
|
||||
.is_none()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for TokenConflictMap<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TokenConflictMap {{\n")?;
|
||||
|
||||
let syntax_grammar = SyntaxGrammar::default();
|
||||
|
||||
write!(f, " following_tokens: {{\n")?;
|
||||
for (i, following_tokens) in self.following_tokens.iter().enumerate() {
|
||||
write!(
|
||||
f,
|
||||
" follow({:?}): {},\n",
|
||||
self.grammar.variables[i].name,
|
||||
TokenSetDisplay(following_tokens, &syntax_grammar, &self.grammar)
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
|
||||
write!(f, " starting_characters: {{\n")?;
|
||||
for i in 0..self.n {
|
||||
write!(
|
||||
f,
|
||||
" {:?}: {:?},\n",
|
||||
self.grammar.variables[i].name, self.starting_chars_by_index[i]
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
|
||||
write!(f, " following_characters: {{\n")?;
|
||||
for i in 0..self.n {
|
||||
write!(
|
||||
f,
|
||||
" {:?}: {:?},\n",
|
||||
self.grammar.variables[i].name, self.following_chars_by_index[i]
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
|
||||
write!(f, " status_matrix: {{\n")?;
|
||||
for i in 0..self.n {
|
||||
write!(f, " {:?}: {{\n", self.grammar.variables[i].name)?;
|
||||
for j in 0..self.n {
|
||||
write!(
|
||||
f,
|
||||
" {:?}: {:?},\n",
|
||||
self.grammar.variables[j].name,
|
||||
self.status_matrix[matrix_index(self.n, i, j)]
|
||||
)?;
|
||||
}
|
||||
write!(f, " }},\n")?;
|
||||
}
|
||||
write!(f, " }},")?;
|
||||
write!(f, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
|
||||
variable_count * i + j
|
||||
}
|
||||
|
||||
fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<CharacterSet> {
|
||||
let mut result = Vec::with_capacity(grammar.variables.len());
|
||||
for variable in &grammar.variables {
|
||||
cursor.reset(vec![variable.start_state]);
|
||||
let mut all_chars = CharacterSet::empty();
|
||||
for (chars, _) in cursor.transition_chars() {
|
||||
all_chars = all_chars.add(chars);
|
||||
}
|
||||
result.push(all_chars);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn get_following_chars(
|
||||
starting_chars: &Vec<CharacterSet>,
|
||||
following_tokens: &Vec<TokenSet>,
|
||||
) -> Vec<CharacterSet> {
|
||||
following_tokens
|
||||
.iter()
|
||||
.map(|following_tokens| {
|
||||
let mut chars = CharacterSet::empty();
|
||||
for token in following_tokens.iter() {
|
||||
if token.is_terminal() {
|
||||
chars = chars.add(&starting_chars[token.index]);
|
||||
}
|
||||
}
|
||||
chars
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn compute_conflict_status(
|
||||
cursor: &mut NfaCursor,
|
||||
grammar: &LexicalGrammar,
|
||||
following_chars: &Vec<CharacterSet>,
|
||||
i: usize,
|
||||
j: usize,
|
||||
) -> (TokenConflictStatus, TokenConflictStatus) {
|
||||
let mut visited_state_sets = HashSet::new();
|
||||
let mut state_set_queue = vec![vec![
|
||||
grammar.variables[i].start_state,
|
||||
grammar.variables[j].start_state,
|
||||
]];
|
||||
let mut result = (
|
||||
TokenConflictStatus::default(),
|
||||
TokenConflictStatus::default(),
|
||||
);
|
||||
|
||||
while let Some(state_set) = state_set_queue.pop() {
|
||||
// Don't pursue states where there's no potential for conflict.
|
||||
if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
|
||||
cursor.reset(state_set);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
|
||||
|
||||
let mut completion = None;
|
||||
for (id, precedence) in cursor.completions() {
|
||||
if has_sep {
|
||||
if id == i {
|
||||
result.0.does_match_separators = true;
|
||||
} else {
|
||||
result.1.does_match_separators = true;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((prev_id, prev_precedence)) = completion {
|
||||
if id == prev_id {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Prefer tokens with higher precedence. For tokens with equal precedence,
|
||||
// prefer those listed earlier in the grammar.
|
||||
let preferred_id;
|
||||
if TokenConflictMap::prefer_token(
|
||||
grammar,
|
||||
(prev_precedence, prev_id),
|
||||
(precedence, id),
|
||||
) {
|
||||
preferred_id = prev_id;
|
||||
} else {
|
||||
preferred_id = id;
|
||||
completion = Some((id, precedence));
|
||||
}
|
||||
|
||||
if preferred_id == i {
|
||||
result.0.matches_same_string = true;
|
||||
result.0.does_overlap = true;
|
||||
} else {
|
||||
result.1.matches_same_string = true;
|
||||
result.1.does_overlap = true;
|
||||
}
|
||||
} else {
|
||||
completion = Some((id, precedence));
|
||||
}
|
||||
}
|
||||
|
||||
for transition in cursor.transitions() {
|
||||
let mut can_advance = true;
|
||||
if let Some((completed_id, completed_precedence)) = completion {
|
||||
let mut other_id = None;
|
||||
let mut successor_contains_completed_id = false;
|
||||
for variable_id in grammar.variable_indices_for_nfa_states(&transition.states) {
|
||||
if variable_id == completed_id {
|
||||
successor_contains_completed_id = true;
|
||||
break;
|
||||
} else {
|
||||
other_id = Some(variable_id);
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(other_id), false) = (other_id, successor_contains_completed_id) {
|
||||
let preferred_id = if TokenConflictMap::prefer_transition(
|
||||
grammar,
|
||||
&transition,
|
||||
completed_id,
|
||||
completed_precedence,
|
||||
has_sep,
|
||||
) {
|
||||
can_advance = true;
|
||||
other_id
|
||||
} else {
|
||||
completed_id
|
||||
};
|
||||
|
||||
if preferred_id == i {
|
||||
result.0.does_overlap = true;
|
||||
if transition.characters.does_intersect(&following_chars[j]) {
|
||||
result.0.does_match_valid_continuation = true;
|
||||
}
|
||||
} else {
|
||||
result.1.does_overlap = true;
|
||||
if transition.characters.does_intersect(&following_chars[i]) {
|
||||
result.1.does_match_valid_continuation = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if can_advance && visited_state_sets.insert(transition.states.clone()) {
|
||||
state_set_queue.push(transition.states);
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{Variable, VariableType};
|
||||
use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
|
||||
use crate::generate::rules::{Rule, Symbol};
|
||||
|
||||
#[test]
|
||||
fn test_starting_characters() {
|
||||
let grammar = expand_tokens(ExtractedLexicalGrammar {
|
||||
separators: Vec::new(),
|
||||
variables: vec![
|
||||
Variable {
|
||||
name: "token_0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::pattern("[a-f]1|0x\\d"),
|
||||
},
|
||||
Variable {
|
||||
name: "token_1".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::pattern("d*ef"),
|
||||
},
|
||||
],
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let token_map = TokenConflictMap::new(&grammar, Vec::new());
|
||||
|
||||
assert_eq!(
|
||||
token_map.starting_chars_by_index[0],
|
||||
CharacterSet::empty().add_range('a', 'f').add_char('0')
|
||||
);
|
||||
assert_eq!(
|
||||
token_map.starting_chars_by_index[1],
|
||||
CharacterSet::empty().add_range('d', 'e')
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_token_conflicts() {
|
||||
let grammar = expand_tokens(ExtractedLexicalGrammar {
|
||||
separators: Vec::new(),
|
||||
variables: vec![
|
||||
Variable {
|
||||
name: "in".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("in"),
|
||||
},
|
||||
Variable {
|
||||
name: "identifier".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::pattern("\\w+"),
|
||||
},
|
||||
Variable {
|
||||
name: "instanceof".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("instanceof"),
|
||||
},
|
||||
],
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let var = |name| index_of_var(&grammar, name);
|
||||
|
||||
let token_map = TokenConflictMap::new(
|
||||
&grammar,
|
||||
vec![
|
||||
[Symbol::terminal(var("identifier"))]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect(),
|
||||
[Symbol::terminal(var("in"))].iter().cloned().collect(),
|
||||
[Symbol::terminal(var("identifier"))]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect(),
|
||||
],
|
||||
);
|
||||
|
||||
// Given the string "in", the `in` token is preferred over the `identifier` token
|
||||
assert!(token_map.does_match_same_string(var("in"), var("identifier")));
|
||||
assert!(!token_map.does_match_same_string(var("identifier"), var("in")));
|
||||
|
||||
// Depending on what character follows, the string "in" may be treated as part of an
|
||||
// `identifier` token.
|
||||
assert!(token_map.does_conflict(var("identifier"), var("in")));
|
||||
|
||||
// Depending on what character follows, the string "instanceof" may be treated as part of
|
||||
// an `identifier` token.
|
||||
assert!(token_map.does_conflict(var("identifier"), var("instanceof")));
|
||||
assert!(token_map.does_conflict(var("instanceof"), var("in")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_token_conflicts_with_separators() {
|
||||
let grammar = expand_tokens(ExtractedLexicalGrammar {
|
||||
separators: vec![Rule::pattern("\\s")],
|
||||
variables: vec![
|
||||
Variable {
|
||||
name: "x".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("x"),
|
||||
},
|
||||
Variable {
|
||||
name: "newline".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("\n"),
|
||||
},
|
||||
],
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let var = |name| index_of_var(&grammar, name);
|
||||
|
||||
let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
|
||||
|
||||
assert!(token_map.does_conflict(var("newline"), var("x")));
|
||||
assert!(!token_map.does_conflict(var("x"), var("newline")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_token_conflicts_with_open_ended_tokens() {
|
||||
let grammar = expand_tokens(ExtractedLexicalGrammar {
|
||||
separators: vec![Rule::pattern("\\s")],
|
||||
variables: vec![
|
||||
Variable {
|
||||
name: "x".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("x"),
|
||||
},
|
||||
Variable {
|
||||
name: "anything".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec(-1, Rule::pattern(".*")),
|
||||
},
|
||||
],
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let var = |name| index_of_var(&grammar, name);
|
||||
|
||||
let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
|
||||
|
||||
assert!(token_map.does_match_shorter_or_longer(var("anything"), var("x")));
|
||||
assert!(!token_map.does_match_shorter_or_longer(var("x"), var("anything")));
|
||||
}
|
||||
|
||||
fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
|
||||
grammar
|
||||
.variables
|
||||
.iter()
|
||||
.position(|v| v.name == name)
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
331
cli/src/generate/dsl.js
Normal file
331
cli/src/generate/dsl.js
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
const UNICODE_ESCAPE_PATTERN = /\\u([0-9a-f]{4})/gi;
|
||||
|
||||
function alias(rule, value) {
|
||||
const result = {
|
||||
type: "ALIAS",
|
||||
content: normalize(rule),
|
||||
named: false,
|
||||
value: null
|
||||
};
|
||||
|
||||
switch (value.constructor) {
|
||||
case String:
|
||||
result.named = false;
|
||||
result.value = value;
|
||||
return result;
|
||||
case ReferenceError:
|
||||
result.named = true;
|
||||
result.value = value.symbol.name;
|
||||
return result;
|
||||
case Object:
|
||||
if (typeof value.type === 'string' && value.type === 'SYMBOL') {
|
||||
result.named = true;
|
||||
result.value = value.name;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Invalid alias value ' + value);
|
||||
}
|
||||
|
||||
function blank() {
|
||||
return {
|
||||
type: "BLANK"
|
||||
};
|
||||
}
|
||||
|
||||
function choice(...elements) {
|
||||
return {
|
||||
type: "CHOICE",
|
||||
members: elements.map(normalize)
|
||||
};
|
||||
}
|
||||
|
||||
function optional(value) {
|
||||
return choice(value, blank());
|
||||
}
|
||||
|
||||
function prec(number, rule) {
|
||||
if (rule == null) {
|
||||
rule = number;
|
||||
number = 0;
|
||||
}
|
||||
|
||||
return {
|
||||
type: "PREC",
|
||||
value: number,
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
prec.left = function(number, rule) {
|
||||
if (rule == null) {
|
||||
rule = number;
|
||||
number = 0;
|
||||
}
|
||||
|
||||
return {
|
||||
type: "PREC_LEFT",
|
||||
value: number,
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
prec.right = function(number, rule) {
|
||||
if (rule == null) {
|
||||
rule = number;
|
||||
number = 0;
|
||||
}
|
||||
|
||||
return {
|
||||
type: "PREC_RIGHT",
|
||||
value: number,
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
prec.dynamic = function(number, rule) {
|
||||
return {
|
||||
type: "PREC_DYNAMIC",
|
||||
value: number,
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
function repeat(rule) {
|
||||
return {
|
||||
type: "REPEAT",
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
function repeat1(rule) {
|
||||
return {
|
||||
type: "REPEAT1",
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
function seq(...elements) {
|
||||
return {
|
||||
type: "SEQ",
|
||||
members: elements.map(normalize)
|
||||
};
|
||||
}
|
||||
|
||||
function sym(name) {
|
||||
return {
|
||||
type: "SYMBOL",
|
||||
name: name
|
||||
};
|
||||
}
|
||||
|
||||
function token(value) {
|
||||
return {
|
||||
type: "TOKEN",
|
||||
content: normalize(value)
|
||||
};
|
||||
}
|
||||
|
||||
token.immediate = function(value) {
|
||||
return {
|
||||
type: "IMMEDIATE_TOKEN",
|
||||
content: normalize(value)
|
||||
};
|
||||
}
|
||||
|
||||
function normalize(value) {
|
||||
if (typeof value == "undefined")
|
||||
throw new Error("Undefined symbol");
|
||||
|
||||
switch (value.constructor) {
|
||||
case String:
|
||||
return {
|
||||
type: 'STRING',
|
||||
value
|
||||
};
|
||||
case RegExp:
|
||||
return {
|
||||
type: 'PATTERN',
|
||||
value: value.source
|
||||
.replace(
|
||||
UNICODE_ESCAPE_PATTERN,
|
||||
(match, group) => String.fromCharCode(parseInt(group, 16))
|
||||
)
|
||||
};
|
||||
case ReferenceError:
|
||||
throw value
|
||||
default:
|
||||
if (typeof value.type === 'string') {
|
||||
return value;
|
||||
} else {
|
||||
throw new TypeError("Invalid rule: " + value.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function RuleBuilder(ruleMap) {
|
||||
return new Proxy({}, {
|
||||
get(target, propertyName) {
|
||||
const symbol = {
|
||||
type: 'SYMBOL',
|
||||
name: propertyName
|
||||
};
|
||||
|
||||
if (!ruleMap || ruleMap.hasOwnProperty(propertyName)) {
|
||||
return symbol;
|
||||
} else {
|
||||
const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
|
||||
error.symbol = symbol;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function grammar(baseGrammar, options) {
|
||||
if (!options) {
|
||||
options = baseGrammar;
|
||||
baseGrammar = {
|
||||
name: null,
|
||||
rules: {},
|
||||
extras: [normalize(/\s/)],
|
||||
conflicts: [],
|
||||
externals: [],
|
||||
inline: []
|
||||
};
|
||||
}
|
||||
|
||||
let externals = baseGrammar.externals;
|
||||
if (options.externals) {
|
||||
if (typeof options.externals !== "function") {
|
||||
throw new Error("Grammar's 'externals' property must be a function.");
|
||||
}
|
||||
|
||||
const externalsRuleBuilder = RuleBuilder(null)
|
||||
const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
|
||||
|
||||
if (!Array.isArray(externalRules)) {
|
||||
throw new Error("Grammar's 'externals' property must return an array of rules.");
|
||||
}
|
||||
|
||||
externals = externalRules.map(normalize);
|
||||
}
|
||||
|
||||
const ruleMap = {};
|
||||
for (const key in options.rules) {
|
||||
ruleMap[key] = true;
|
||||
}
|
||||
for (const key in baseGrammar.rules) {
|
||||
ruleMap[key] = true;
|
||||
}
|
||||
for (const external of externals) {
|
||||
if (typeof external.name === 'string') {
|
||||
ruleMap[external.name] = true;
|
||||
}
|
||||
}
|
||||
|
||||
const ruleBuilder = RuleBuilder(ruleMap);
|
||||
|
||||
const name = options.name;
|
||||
if (typeof name !== "string") {
|
||||
throw new Error("Grammar's 'name' property must be a string.");
|
||||
}
|
||||
|
||||
if (!/^[a-zA-Z_]\w*$/.test(name)) {
|
||||
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
|
||||
}
|
||||
|
||||
let rules = Object.assign({}, baseGrammar.rules);
|
||||
if (options.rules) {
|
||||
if (typeof options.rules !== "object") {
|
||||
throw new Error("Grammar's 'rules' property must be an object.");
|
||||
}
|
||||
|
||||
for (const ruleName in options.rules) {
|
||||
const ruleFn = options.rules[ruleName];
|
||||
if (typeof ruleFn !== "function") {
|
||||
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
|
||||
}
|
||||
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
|
||||
}
|
||||
}
|
||||
|
||||
let extras = baseGrammar.extras.slice();
|
||||
if (options.extras) {
|
||||
if (typeof options.extras !== "function") {
|
||||
throw new Error("Grammar's 'extras' property must be a function.");
|
||||
}
|
||||
|
||||
extras = options.extras
|
||||
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
|
||||
.map(normalize);
|
||||
}
|
||||
|
||||
let word = baseGrammar.word;
|
||||
if (options.word) {
|
||||
word = options.word.call(ruleBuilder, ruleBuilder).name;
|
||||
if (typeof word != 'string') {
|
||||
throw new Error("Grammar's 'word' property must be a named rule.");
|
||||
}
|
||||
}
|
||||
|
||||
let conflicts = baseGrammar.conflicts;
|
||||
if (options.conflicts) {
|
||||
if (typeof options.conflicts !== "function") {
|
||||
throw new Error("Grammar's 'conflicts' property must be a function.");
|
||||
}
|
||||
|
||||
const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
|
||||
const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
|
||||
|
||||
if (!Array.isArray(conflictRules)) {
|
||||
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
|
||||
}
|
||||
|
||||
conflicts = conflictRules.map(conflictSet => {
|
||||
if (!Array.isArray(conflictSet)) {
|
||||
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
|
||||
}
|
||||
|
||||
return conflictSet.map(symbol => normalize(symbol).name);
|
||||
});
|
||||
}
|
||||
|
||||
let inline = baseGrammar.inline;
|
||||
if (options.inline) {
|
||||
if (typeof options.inline !== "function") {
|
||||
throw new Error("Grammar's 'inline' property must be a function.");
|
||||
}
|
||||
|
||||
const baseInlineRules = baseGrammar.inline.map(sym);
|
||||
const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
|
||||
|
||||
if (!Array.isArray(inlineRules)) {
|
||||
throw new Error("Grammar's inline must be an array of rules.");
|
||||
}
|
||||
|
||||
inline = inlineRules.map(symbol => symbol.name);
|
||||
}
|
||||
|
||||
if (Object.keys(rules).length == 0) {
|
||||
throw new Error("Grammar must have at least one rule.");
|
||||
}
|
||||
|
||||
return {name, word, rules, extras, conflicts, externals, inline};
|
||||
}
|
||||
|
||||
global.alias = alias;
|
||||
global.blank = blank;
|
||||
global.choice = choice;
|
||||
global.optional = optional;
|
||||
global.prec = prec;
|
||||
global.repeat = repeat;
|
||||
global.repeat1 = repeat1;
|
||||
global.seq = seq;
|
||||
global.sym = sym;
|
||||
global.token = token;
|
||||
global.grammar = grammar;
|
||||
|
||||
const result = require(process.env.TREE_SITTER_GRAMMAR_PATH);
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
223
cli/src/generate/grammars.rs
Normal file
223
cli/src/generate/grammars.rs
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
use super::nfa::Nfa;
|
||||
use super::rules::{Alias, Associativity, Rule, Symbol};
|
||||
use hashbrown::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum VariableType {
|
||||
Hidden,
|
||||
Auxiliary,
|
||||
Anonymous,
|
||||
Named,
|
||||
}
|
||||
|
||||
// Input grammar
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct Variable {
|
||||
pub name: String,
|
||||
pub kind: VariableType,
|
||||
pub rule: Rule,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct InputGrammar {
|
||||
pub name: String,
|
||||
pub variables: Vec<Variable>,
|
||||
pub extra_tokens: Vec<Rule>,
|
||||
pub expected_conflicts: Vec<Vec<String>>,
|
||||
pub external_tokens: Vec<Rule>,
|
||||
pub variables_to_inline: Vec<String>,
|
||||
pub word_token: Option<String>,
|
||||
}
|
||||
|
||||
// Extracted lexical grammar
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct LexicalVariable {
|
||||
pub name: String,
|
||||
pub kind: VariableType,
|
||||
pub implicit_precedence: i32,
|
||||
pub start_state: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct LexicalGrammar {
|
||||
pub nfa: Nfa,
|
||||
pub variables: Vec<LexicalVariable>,
|
||||
}
|
||||
|
||||
// Extracted syntax grammar
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) struct ProductionStep {
|
||||
pub symbol: Symbol,
|
||||
pub precedence: i32,
|
||||
pub associativity: Option<Associativity>,
|
||||
pub alias: Option<Alias>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct Production {
|
||||
pub steps: Vec<ProductionStep>,
|
||||
pub dynamic_precedence: i32,
|
||||
}
|
||||
|
||||
pub(crate) struct InlinedProductionMap {
|
||||
pub productions: Vec<Production>,
|
||||
pub production_map: HashMap<(*const Production, u32), Vec<usize>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct SyntaxVariable {
|
||||
pub name: String,
|
||||
pub kind: VariableType,
|
||||
pub productions: Vec<Production>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ExternalToken {
|
||||
pub name: String,
|
||||
pub kind: VariableType,
|
||||
pub corresponding_internal_token: Option<Symbol>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct SyntaxGrammar {
|
||||
pub variables: Vec<SyntaxVariable>,
|
||||
pub extra_tokens: Vec<Symbol>,
|
||||
pub expected_conflicts: Vec<Vec<Symbol>>,
|
||||
pub external_tokens: Vec<ExternalToken>,
|
||||
pub variables_to_inline: Vec<Symbol>,
|
||||
pub word_token: Option<Symbol>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl ProductionStep {
|
||||
pub(crate) fn new(symbol: Symbol) -> Self {
|
||||
Self {
|
||||
symbol,
|
||||
precedence: 0,
|
||||
associativity: None,
|
||||
alias: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn with_prec(self, precedence: i32, associativity: Option<Associativity>) -> Self {
|
||||
Self {
|
||||
symbol: self.symbol,
|
||||
precedence,
|
||||
associativity,
|
||||
alias: self.alias,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn with_alias(self, value: &str, is_named: bool) -> Self {
|
||||
Self {
|
||||
symbol: self.symbol,
|
||||
precedence: self.precedence,
|
||||
associativity: self.associativity,
|
||||
alias: Some(Alias {
|
||||
value: value.to_string(),
|
||||
is_named,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Production {
|
||||
pub fn first_symbol(&self) -> Option<Symbol> {
|
||||
self.steps.first().map(|s| s.symbol.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Production {
|
||||
fn default() -> Self {
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Variable {
|
||||
pub fn named(name: &str, rule: Rule) -> Self {
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn auxiliary(name: &str, rule: Rule) -> Self {
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
kind: VariableType::Auxiliary,
|
||||
rule,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hidden(name: &str, rule: Rule) -> Self {
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
rule,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn anonymous(name: &str, rule: Rule) -> Self {
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
rule,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LexicalGrammar {
|
||||
pub fn variable_indices_for_nfa_states<'a>(
|
||||
&'a self,
|
||||
state_ids: &'a Vec<u32>,
|
||||
) -> impl Iterator<Item = usize> + 'a {
|
||||
let mut prev = None;
|
||||
state_ids.iter().filter_map(move |state_id| {
|
||||
let variable_id = self.variable_index_for_nfa_state(*state_id);
|
||||
if prev != Some(variable_id) {
|
||||
prev = Some(variable_id);
|
||||
prev
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
|
||||
self.variables
|
||||
.iter()
|
||||
.position(|v| v.start_state >= state_id)
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl SyntaxVariable {
|
||||
pub fn is_auxiliary(&self) -> bool {
|
||||
self.kind == VariableType::Auxiliary
|
||||
}
|
||||
}
|
||||
|
||||
impl InlinedProductionMap {
|
||||
pub fn inlined_productions<'a>(
|
||||
&'a self,
|
||||
production: &Production,
|
||||
step_index: u32,
|
||||
) -> Option<impl Iterator<Item = &'a Production> + 'a> {
|
||||
self.production_map
|
||||
.get(&(production as *const Production, step_index))
|
||||
.map(|production_indices| {
|
||||
production_indices
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(move |index| &self.productions[index])
|
||||
})
|
||||
}
|
||||
}
|
||||
142
cli/src/generate/mod.rs
Normal file
142
cli/src/generate/mod.rs
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
use self::build_tables::build_tables;
|
||||
use self::parse_grammar::parse_grammar;
|
||||
use self::prepare_grammar::prepare_grammar;
|
||||
use self::render::render_c_code;
|
||||
use crate::error::{Error, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
mod build_tables;
|
||||
mod grammars;
|
||||
mod nfa;
|
||||
mod npm_files;
|
||||
mod parse_grammar;
|
||||
mod prepare_grammar;
|
||||
mod render;
|
||||
mod rules;
|
||||
mod tables;
|
||||
|
||||
lazy_static! {
|
||||
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn generate_parser_in_directory(
|
||||
repo_path: &PathBuf,
|
||||
grammar_path: Option<&str>,
|
||||
minimize: bool,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<()> {
|
||||
let grammar_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
|
||||
let grammar_json = load_grammar_file(&grammar_path)?;
|
||||
let (language_name, c_code) =
|
||||
generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
|
||||
let repo_src_path = repo_path.join("src");
|
||||
let repo_header_path = repo_src_path.join("tree_sitter");
|
||||
fs::create_dir_all(&repo_src_path)?;
|
||||
fs::create_dir_all(&repo_header_path)?;
|
||||
fs::write(&repo_src_path.join("parser.c"), c_code)
|
||||
.map_err(|e| format!("Failed to write parser.c: {}", e))?;
|
||||
ensure_file(&repo_src_path.join("binding.cc"), || {
|
||||
npm_files::binding_cc(&language_name)
|
||||
})?;
|
||||
ensure_file(&repo_path.join("binding.gyp"), || {
|
||||
npm_files::binding_gyp(&language_name)
|
||||
})?;
|
||||
ensure_file(&repo_path.join("index.js"), || {
|
||||
npm_files::index_js(&language_name)
|
||||
})?;
|
||||
ensure_file(&repo_header_path.join("parser.h"), || {
|
||||
include_str!("../../../lib/include/tree_sitter/parser.h")
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
|
||||
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
|
||||
generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
|
||||
}
|
||||
|
||||
fn generate_parser_for_grammar_with_opts(
|
||||
grammar_json: &str,
|
||||
minimize: bool,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(String, String)> {
|
||||
let input_grammar = parse_grammar(grammar_json)?;
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(&input_grammar)?;
|
||||
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&inlines,
|
||||
minimize,
|
||||
state_ids_to_log,
|
||||
)?;
|
||||
let c_code = render_c_code(
|
||||
&input_grammar.name,
|
||||
parse_table,
|
||||
main_lex_table,
|
||||
keyword_lex_table,
|
||||
keyword_capture_token,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
);
|
||||
Ok((input_grammar.name, c_code))
|
||||
}
|
||||
|
||||
fn load_grammar_file(grammar_path: &PathBuf) -> Result<String> {
|
||||
match grammar_path.extension().and_then(|e| e.to_str()) {
|
||||
Some("js") => Ok(load_js_grammar_file(grammar_path)?),
|
||||
Some("json") => Ok(fs::read_to_string(grammar_path)?),
|
||||
_ => Err(Error(format!(
|
||||
"Unknown grammar file extension: {:?}",
|
||||
grammar_path
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn load_js_grammar_file(grammar_path: &PathBuf) -> Result<String> {
|
||||
let mut node_process = Command::new("node")
|
||||
.env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("Failed to run `node`");
|
||||
|
||||
let mut node_stdin = node_process
|
||||
.stdin
|
||||
.take()
|
||||
.expect("Failed to open stdin for node");
|
||||
let javascript_code = include_bytes!("./dsl.js");
|
||||
node_stdin
|
||||
.write(javascript_code)
|
||||
.expect("Failed to write to node's stdin");
|
||||
drop(node_stdin);
|
||||
let output = node_process
|
||||
.wait_with_output()
|
||||
.expect("Failed to read output from node");
|
||||
match output.status.code() {
|
||||
None => panic!("Node process was killed"),
|
||||
Some(0) => {}
|
||||
Some(code) => return Err(Error(format!("Node process exited with status {}", code))),
|
||||
}
|
||||
|
||||
Ok(String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"))
|
||||
}
|
||||
|
||||
fn ensure_file<T: AsRef<[u8]>>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> {
|
||||
if path.exists() {
|
||||
Ok(())
|
||||
} else {
|
||||
fs::write(path, f().as_ref())
|
||||
.map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e)))
|
||||
}
|
||||
}
|
||||
768
cli/src/generate/nfa.rs
Normal file
768
cli/src/generate/nfa.rs
Normal file
|
|
@ -0,0 +1,768 @@
|
|||
use std::char;
|
||||
use std::cmp::max;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
use std::mem::swap;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum CharacterSet {
|
||||
Include(Vec<char>),
|
||||
Exclude(Vec<char>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum NfaState {
|
||||
Advance {
|
||||
chars: CharacterSet,
|
||||
state_id: u32,
|
||||
is_sep: bool,
|
||||
precedence: i32,
|
||||
},
|
||||
Split(u32, u32),
|
||||
Accept {
|
||||
variable_index: usize,
|
||||
precedence: i32,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct Nfa {
|
||||
pub states: Vec<NfaState>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct NfaCursor<'a> {
|
||||
pub(crate) state_ids: Vec<u32>,
|
||||
nfa: &'a Nfa,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct NfaTransition {
|
||||
pub characters: CharacterSet,
|
||||
pub is_separator: bool,
|
||||
pub precedence: i32,
|
||||
pub states: Vec<u32>,
|
||||
}
|
||||
|
||||
impl Default for Nfa {
|
||||
fn default() -> Self {
|
||||
Self { states: Vec::new() }
|
||||
}
|
||||
}
|
||||
|
||||
impl CharacterSet {
|
||||
pub fn empty() -> Self {
|
||||
CharacterSet::Include(Vec::new())
|
||||
}
|
||||
|
||||
pub fn negate(self) -> CharacterSet {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => CharacterSet::Exclude(chars),
|
||||
CharacterSet::Exclude(chars) => CharacterSet::Include(chars),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_char(self, c: char) -> Self {
|
||||
if let CharacterSet::Include(mut chars) = self {
|
||||
if let Err(i) = chars.binary_search(&c) {
|
||||
chars.insert(i, c);
|
||||
}
|
||||
CharacterSet::Include(chars)
|
||||
} else {
|
||||
panic!("Called add with a negated character set");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_range(self, start: char, end: char) -> Self {
|
||||
if let CharacterSet::Include(mut chars) = self {
|
||||
let mut c = start as u32;
|
||||
while c <= end as u32 {
|
||||
chars.push(char::from_u32(c).unwrap());
|
||||
c += 1;
|
||||
}
|
||||
chars.sort_unstable();
|
||||
chars.dedup();
|
||||
CharacterSet::Include(chars)
|
||||
} else {
|
||||
panic!("Called add with a negated character set");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(self, other: &CharacterSet) -> Self {
|
||||
match self {
|
||||
CharacterSet::Include(mut chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
chars.extend(other_chars);
|
||||
chars.sort_unstable();
|
||||
chars.dedup();
|
||||
CharacterSet::Include(chars)
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
let excluded_chars = other_chars
|
||||
.iter()
|
||||
.cloned()
|
||||
.filter(|c| !chars.contains(&c))
|
||||
.collect();
|
||||
CharacterSet::Exclude(excluded_chars)
|
||||
}
|
||||
},
|
||||
CharacterSet::Exclude(mut chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
chars.retain(|c| !other_chars.contains(&c));
|
||||
CharacterSet::Exclude(chars)
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
chars.retain(|c| other_chars.contains(&c));
|
||||
CharacterSet::Exclude(chars)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn does_intersect(&self, other: &CharacterSet) -> bool {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => match other {
|
||||
CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).common,
|
||||
CharacterSet::Exclude(other_chars) => compare_chars(chars, other_chars).left_only,
|
||||
},
|
||||
CharacterSet::Exclude(chars) => match other {
|
||||
CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).right_only,
|
||||
CharacterSet::Exclude(_) => true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
CharacterSet::Include(remove_chars(chars, other_chars, true))
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
let mut removed = remove_chars(chars, other_chars, false);
|
||||
add_chars(other_chars, chars);
|
||||
swap(&mut removed, chars);
|
||||
CharacterSet::Include(removed)
|
||||
}
|
||||
},
|
||||
CharacterSet::Exclude(chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
let mut removed = remove_chars(other_chars, chars, false);
|
||||
add_chars(chars, other_chars);
|
||||
swap(&mut removed, other_chars);
|
||||
CharacterSet::Include(removed)
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
let mut result_exclusion = chars.clone();
|
||||
result_exclusion.extend(other_chars.iter().cloned());
|
||||
result_exclusion.sort_unstable();
|
||||
result_exclusion.dedup();
|
||||
remove_chars(chars, other_chars, true);
|
||||
let mut included_characters = Vec::new();
|
||||
let mut other_included_characters = Vec::new();
|
||||
swap(&mut included_characters, other_chars);
|
||||
swap(&mut other_included_characters, chars);
|
||||
*self = CharacterSet::Include(included_characters);
|
||||
*other = CharacterSet::Include(other_included_characters);
|
||||
CharacterSet::Exclude(result_exclusion)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
if let CharacterSet::Include(c) = self {
|
||||
c.is_empty()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn contains(&self, c: char) -> bool {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => chars.contains(&c),
|
||||
CharacterSet::Exclude(chars) => !chars.contains(&c),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for CharacterSet {
|
||||
fn cmp(&self, other: &CharacterSet) -> Ordering {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => {
|
||||
if let CharacterSet::Include(other_chars) = other {
|
||||
order_chars(chars, other_chars)
|
||||
} else {
|
||||
Ordering::Less
|
||||
}
|
||||
}
|
||||
CharacterSet::Exclude(chars) => {
|
||||
if let CharacterSet::Exclude(other_chars) = other {
|
||||
order_chars(chars, other_chars)
|
||||
} else {
|
||||
Ordering::Greater
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for CharacterSet {
|
||||
fn partial_cmp(&self, other: &CharacterSet) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
fn add_chars(left: &mut Vec<char>, right: &Vec<char>) {
|
||||
for c in right {
|
||||
match left.binary_search(c) {
|
||||
Err(i) => left.insert(i, *c),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_chars(left: &mut Vec<char>, right: &mut Vec<char>, mutate_right: bool) -> Vec<char> {
|
||||
let mut result = Vec::new();
|
||||
right.retain(|right_char| {
|
||||
if let Some(index) = left.iter().position(|left_char| *left_char == *right_char) {
|
||||
left.remove(index);
|
||||
result.push(*right_char);
|
||||
false || !mutate_right
|
||||
} else {
|
||||
true
|
||||
}
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
struct SetComparision {
|
||||
left_only: bool,
|
||||
common: bool,
|
||||
right_only: bool,
|
||||
}
|
||||
|
||||
fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
|
||||
let mut result = SetComparision {
|
||||
left_only: false,
|
||||
common: false,
|
||||
right_only: false,
|
||||
};
|
||||
let mut left = left.iter().cloned();
|
||||
let mut right = right.iter().cloned();
|
||||
let mut i = left.next();
|
||||
let mut j = right.next();
|
||||
while let (Some(left_char), Some(right_char)) = (i, j) {
|
||||
if left_char < right_char {
|
||||
i = left.next();
|
||||
result.left_only = true;
|
||||
} else if left_char > right_char {
|
||||
j = right.next();
|
||||
result.right_only = true;
|
||||
} else {
|
||||
i = left.next();
|
||||
j = right.next();
|
||||
result.common = true;
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn order_chars(chars: &Vec<char>, other_chars: &Vec<char>) -> Ordering {
|
||||
if chars.is_empty() {
|
||||
if other_chars.is_empty() {
|
||||
Ordering::Equal
|
||||
} else {
|
||||
Ordering::Less
|
||||
}
|
||||
} else if other_chars.is_empty() {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
let cmp = chars.len().cmp(&other_chars.len());
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
}
|
||||
for (c, other_c) in chars.iter().zip(other_chars.iter()) {
|
||||
let cmp = c.cmp(other_c);
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl Nfa {
|
||||
pub fn new() -> Self {
|
||||
Nfa { states: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn last_state_id(&self) -> u32 {
|
||||
self.states.len() as u32 - 1
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Nfa {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "Nfa {{ states: {{\n")?;
|
||||
for (i, state) in self.states.iter().enumerate() {
|
||||
write!(f, " {}: {:?},\n", i, state)?;
|
||||
}
|
||||
write!(f, "}} }}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> NfaCursor<'a> {
|
||||
pub fn new(nfa: &'a Nfa, mut states: Vec<u32>) -> Self {
|
||||
let mut result = Self {
|
||||
nfa,
|
||||
state_ids: Vec::new(),
|
||||
};
|
||||
result.add_states(&mut states);
|
||||
result
|
||||
}
|
||||
|
||||
pub fn reset(&mut self, mut states: Vec<u32>) {
|
||||
self.state_ids.clear();
|
||||
self.add_states(&mut states);
|
||||
}
|
||||
|
||||
pub fn force_reset(&mut self, states: Vec<u32>) {
|
||||
self.state_ids = states
|
||||
}
|
||||
|
||||
pub fn transition_chars(&self) -> impl Iterator<Item = (&CharacterSet, bool)> {
|
||||
self.raw_transitions().map(|t| (t.0, t.1))
|
||||
}
|
||||
|
||||
pub fn transitions(&self) -> Vec<NfaTransition> {
|
||||
Self::group_transitions(self.raw_transitions())
|
||||
}
|
||||
|
||||
fn raw_transitions(&self) -> impl Iterator<Item = (&CharacterSet, bool, i32, u32)> {
|
||||
self.state_ids.iter().filter_map(move |id| {
|
||||
if let NfaState::Advance {
|
||||
chars,
|
||||
state_id,
|
||||
precedence,
|
||||
is_sep,
|
||||
} = &self.nfa.states[*id as usize]
|
||||
{
|
||||
Some((chars, *is_sep, *precedence, *state_id))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn group_transitions<'b>(
|
||||
iter: impl Iterator<Item = (&'b CharacterSet, bool, i32, u32)>,
|
||||
) -> Vec<NfaTransition> {
|
||||
let mut result: Vec<NfaTransition> = Vec::new();
|
||||
for (chars, is_sep, prec, state) in iter {
|
||||
let mut chars = chars.clone();
|
||||
let mut i = 0;
|
||||
while i < result.len() && !chars.is_empty() {
|
||||
let intersection = result[i].characters.remove_intersection(&mut chars);
|
||||
if !intersection.is_empty() {
|
||||
let mut intersection_states = result[i].states.clone();
|
||||
match intersection_states.binary_search(&state) {
|
||||
Err(j) => intersection_states.insert(j, state),
|
||||
_ => {}
|
||||
}
|
||||
let intersection_transition = NfaTransition {
|
||||
characters: intersection,
|
||||
is_separator: result[i].is_separator && is_sep,
|
||||
precedence: max(result[i].precedence, prec),
|
||||
states: intersection_states,
|
||||
};
|
||||
if result[i].characters.is_empty() {
|
||||
result[i] = intersection_transition;
|
||||
} else {
|
||||
result.insert(i, intersection_transition);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
if !chars.is_empty() {
|
||||
result.push(NfaTransition {
|
||||
characters: chars,
|
||||
precedence: prec,
|
||||
states: vec![state],
|
||||
is_separator: is_sep,
|
||||
});
|
||||
}
|
||||
}
|
||||
result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters));
|
||||
result
|
||||
}
|
||||
|
||||
pub fn completions(&self) -> impl Iterator<Item = (usize, i32)> + '_ {
|
||||
self.state_ids.iter().filter_map(move |state_id| {
|
||||
if let NfaState::Accept {
|
||||
variable_index,
|
||||
precedence,
|
||||
} = self.nfa.states[*state_id as usize]
|
||||
{
|
||||
Some((variable_index, precedence))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn add_states(&mut self, new_state_ids: &mut Vec<u32>) {
|
||||
let mut i = 0;
|
||||
while i < new_state_ids.len() {
|
||||
let state_id = new_state_ids[i];
|
||||
let state = &self.nfa.states[state_id as usize];
|
||||
if let NfaState::Split(left, right) = state {
|
||||
let mut has_left = false;
|
||||
let mut has_right = false;
|
||||
for new_state_id in new_state_ids.iter() {
|
||||
if *new_state_id == *left {
|
||||
has_left = true;
|
||||
}
|
||||
if *new_state_id == *right {
|
||||
has_right = true;
|
||||
}
|
||||
}
|
||||
if !has_left {
|
||||
new_state_ids.push(*left);
|
||||
}
|
||||
if !has_right {
|
||||
new_state_ids.push(*right);
|
||||
}
|
||||
} else if let Err(i) = self.state_ids.binary_search(&state_id) {
|
||||
self.state_ids.insert(i, state_id);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_group_transitions() {
|
||||
let table = [
|
||||
// overlapping character classes
|
||||
(
|
||||
vec![
|
||||
(CharacterSet::empty().add_range('a', 'f'), false, 0, 1),
|
||||
(CharacterSet::empty().add_range('d', 'i'), false, 1, 2),
|
||||
],
|
||||
vec![
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_range('a', 'c'),
|
||||
is_separator: false,
|
||||
precedence: 0,
|
||||
states: vec![1],
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_range('d', 'f'),
|
||||
is_separator: false,
|
||||
precedence: 1,
|
||||
states: vec![1, 2],
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_range('g', 'i'),
|
||||
is_separator: false,
|
||||
precedence: 1,
|
||||
states: vec![2],
|
||||
},
|
||||
],
|
||||
),
|
||||
// large character class followed by many individual characters
|
||||
(
|
||||
vec![
|
||||
(CharacterSet::empty().add_range('a', 'z'), false, 0, 1),
|
||||
(CharacterSet::empty().add_char('d'), false, 0, 2),
|
||||
(CharacterSet::empty().add_char('i'), false, 0, 3),
|
||||
(CharacterSet::empty().add_char('f'), false, 0, 4),
|
||||
],
|
||||
vec![
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_char('d'),
|
||||
is_separator: false,
|
||||
precedence: 0,
|
||||
states: vec![1, 2],
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_char('f'),
|
||||
is_separator: false,
|
||||
precedence: 0,
|
||||
states: vec![1, 4],
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_char('i'),
|
||||
is_separator: false,
|
||||
precedence: 0,
|
||||
states: vec![1, 3],
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty()
|
||||
.add_range('a', 'c')
|
||||
.add_char('e')
|
||||
.add_range('g', 'h')
|
||||
.add_range('j', 'z'),
|
||||
is_separator: false,
|
||||
precedence: 0,
|
||||
states: vec![1],
|
||||
},
|
||||
],
|
||||
),
|
||||
// negated character class followed by an individual character
|
||||
(
|
||||
vec![
|
||||
(CharacterSet::empty().add_char('0'), false, 0, 1),
|
||||
(CharacterSet::empty().add_char('b'), false, 0, 2),
|
||||
(
|
||||
CharacterSet::empty().add_range('a', 'f').negate(),
|
||||
false,
|
||||
0,
|
||||
3,
|
||||
),
|
||||
(CharacterSet::empty().add_char('c'), false, 0, 4),
|
||||
],
|
||||
vec![
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_char('0'),
|
||||
precedence: 0,
|
||||
states: vec![1, 3],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_char('b'),
|
||||
precedence: 0,
|
||||
states: vec![2],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty().add_char('c'),
|
||||
precedence: 0,
|
||||
states: vec![4],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::empty()
|
||||
.add_range('a', 'f')
|
||||
.add_char('0')
|
||||
.negate(),
|
||||
precedence: 0,
|
||||
states: vec![3],
|
||||
is_separator: false,
|
||||
},
|
||||
],
|
||||
),
|
||||
// multiple negated character classes
|
||||
(
|
||||
vec![
|
||||
(CharacterSet::Include(vec!['a']), false, 0, 1),
|
||||
(CharacterSet::Exclude(vec!['a', 'b', 'c']), false, 0, 2),
|
||||
(CharacterSet::Include(vec!['g']), false, 0, 6),
|
||||
(CharacterSet::Exclude(vec!['d', 'e', 'f']), false, 0, 3),
|
||||
(CharacterSet::Exclude(vec!['g', 'h', 'i']), false, 0, 4),
|
||||
(CharacterSet::Include(vec!['g']), false, 0, 5),
|
||||
],
|
||||
vec![
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['a']),
|
||||
precedence: 0,
|
||||
states: vec![1, 3, 4],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['g']),
|
||||
precedence: 0,
|
||||
states: vec![2, 3, 5, 6],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['b', 'c']),
|
||||
precedence: 0,
|
||||
states: vec![3, 4],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['h', 'i']),
|
||||
precedence: 0,
|
||||
states: vec![2, 3],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['d', 'e', 'f']),
|
||||
precedence: 0,
|
||||
states: vec![2, 4],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Exclude(vec![
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
|
||||
]),
|
||||
precedence: 0,
|
||||
states: vec![2, 3, 4],
|
||||
is_separator: false,
|
||||
},
|
||||
],
|
||||
),
|
||||
];
|
||||
|
||||
for row in table.iter() {
|
||||
assert_eq!(
|
||||
NfaCursor::group_transitions(row.0.iter().map(|(c, sep, p, s)| (c, *sep, *p, *s))),
|
||||
row.1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_character_set_remove_intersection() {
|
||||
// A whitelist and an overlapping whitelist.
|
||||
// Both sets contain 'c', 'd', and 'f'
|
||||
let mut a = CharacterSet::empty().add_range('a', 'f');
|
||||
let mut b = CharacterSet::empty().add_range('c', 'h');
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::empty().add_range('c', 'f')
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
|
||||
|
||||
let mut a = CharacterSet::empty().add_range('a', 'f');
|
||||
let mut b = CharacterSet::empty().add_range('c', 'h');
|
||||
assert_eq!(
|
||||
b.remove_intersection(&mut a),
|
||||
CharacterSet::empty().add_range('c', 'f')
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
|
||||
|
||||
// A whitelist and a larger whitelist.
|
||||
let mut a = CharacterSet::empty().add_char('c');
|
||||
let mut b = CharacterSet::empty().add_range('a', 'e');
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::empty().add_char('c')
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty());
|
||||
assert_eq!(
|
||||
b,
|
||||
CharacterSet::empty()
|
||||
.add_range('a', 'b')
|
||||
.add_range('d', 'e')
|
||||
);
|
||||
|
||||
let mut a = CharacterSet::empty().add_char('c');
|
||||
let mut b = CharacterSet::empty().add_range('a', 'e');
|
||||
assert_eq!(
|
||||
b.remove_intersection(&mut a),
|
||||
CharacterSet::empty().add_char('c')
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty());
|
||||
assert_eq!(
|
||||
b,
|
||||
CharacterSet::empty()
|
||||
.add_range('a', 'b')
|
||||
.add_range('d', 'e')
|
||||
);
|
||||
|
||||
// A whitelist and an intersecting blacklist.
|
||||
// Both sets contain 'e', 'f', and 'm'
|
||||
let mut a = CharacterSet::empty()
|
||||
.add_range('c', 'h')
|
||||
.add_range('k', 'm');
|
||||
let mut b = CharacterSet::empty()
|
||||
.add_range('a', 'd')
|
||||
.add_range('g', 'l')
|
||||
.negate();
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::Include(vec!['e', 'f', 'm'])
|
||||
);
|
||||
assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
|
||||
|
||||
let mut a = CharacterSet::empty()
|
||||
.add_range('c', 'h')
|
||||
.add_range('k', 'm');
|
||||
let mut b = CharacterSet::empty()
|
||||
.add_range('a', 'd')
|
||||
.add_range('g', 'l')
|
||||
.negate();
|
||||
assert_eq!(
|
||||
b.remove_intersection(&mut a),
|
||||
CharacterSet::Include(vec!['e', 'f', 'm'])
|
||||
);
|
||||
assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
|
||||
|
||||
// A blacklist and an overlapping blacklist.
|
||||
// Both sets exclude 'c', 'd', and 'e'
|
||||
let mut a = CharacterSet::empty().add_range('a', 'e').negate();
|
||||
let mut b = CharacterSet::empty().add_range('c', 'h').negate();
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::empty().add_range('a', 'h').negate(),
|
||||
);
|
||||
assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
|
||||
assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
|
||||
|
||||
// A blacklist and a larger blacklist.
|
||||
let mut a = CharacterSet::empty().add_range('b', 'c').negate();
|
||||
let mut b = CharacterSet::empty().add_range('a', 'd').negate();
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::empty().add_range('a', 'd').negate(),
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty().add_char('a').add_char('d'));
|
||||
assert_eq!(b, CharacterSet::empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_character_set_does_intersect() {
|
||||
let (a, b) = (CharacterSet::empty(), CharacterSet::empty());
|
||||
assert!(!a.does_intersect(&b));
|
||||
assert!(!b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::empty().add_char('a'),
|
||||
CharacterSet::empty().add_char('a'),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::empty().add_char('b'),
|
||||
CharacterSet::empty().add_char('a').add_char('c'),
|
||||
);
|
||||
assert!(!a.does_intersect(&b));
|
||||
assert!(!b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Include(vec!['b']),
|
||||
CharacterSet::Exclude(vec!['a', 'b', 'c']),
|
||||
);
|
||||
assert!(!a.does_intersect(&b));
|
||||
assert!(!b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Include(vec!['b']),
|
||||
CharacterSet::Exclude(vec!['a', 'c']),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Exclude(vec!['a']),
|
||||
CharacterSet::Exclude(vec!['a']),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
}
|
||||
}
|
||||
18
cli/src/generate/npm_files.rs
Normal file
18
cli/src/generate/npm_files.rs
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
use std::str;
|
||||
|
||||
const BINDING_CC_TEMPLATE: &'static str = include_str!("./templates/binding.cc");
|
||||
const BINDING_GYP_TEMPLATE: &'static str = include_str!("./templates/binding.gyp");
|
||||
const INDEX_JS_TEMPLATE: &'static str = include_str!("./templates/index.js");
|
||||
const PARSER_NAME_PLACEHOLDER: &'static str = "PARSER_NAME";
|
||||
|
||||
pub fn binding_cc(parser_name: &str) -> String {
|
||||
BINDING_CC_TEMPLATE.replace(PARSER_NAME_PLACEHOLDER, parser_name)
|
||||
}
|
||||
|
||||
pub fn binding_gyp(parser_name: &str) -> String {
|
||||
BINDING_GYP_TEMPLATE.replace(PARSER_NAME_PLACEHOLDER, parser_name)
|
||||
}
|
||||
|
||||
pub fn index_js(parser_name: &str) -> String {
|
||||
INDEX_JS_TEMPLATE.replace(PARSER_NAME_PLACEHOLDER, parser_name)
|
||||
}
|
||||
182
cli/src/generate/parse_grammar.rs
Normal file
182
cli/src/generate/parse_grammar.rs
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
use super::grammars::{InputGrammar, Variable, VariableType};
|
||||
use super::rules::Rule;
|
||||
use crate::error::Result;
|
||||
use serde_derive::Deserialize;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
#[allow(non_camel_case_types)]
|
||||
enum RuleJSON {
|
||||
ALIAS {
|
||||
content: Box<RuleJSON>,
|
||||
named: bool,
|
||||
value: String,
|
||||
},
|
||||
BLANK,
|
||||
STRING {
|
||||
value: String,
|
||||
},
|
||||
PATTERN {
|
||||
value: String,
|
||||
},
|
||||
SYMBOL {
|
||||
name: String,
|
||||
},
|
||||
CHOICE {
|
||||
members: Vec<RuleJSON>,
|
||||
},
|
||||
SEQ {
|
||||
members: Vec<RuleJSON>,
|
||||
},
|
||||
REPEAT {
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
REPEAT1 {
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
PREC_DYNAMIC {
|
||||
value: i32,
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
PREC_LEFT {
|
||||
value: i32,
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
PREC_RIGHT {
|
||||
value: i32,
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
PREC {
|
||||
value: i32,
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
TOKEN {
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
IMMEDIATE_TOKEN {
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GrammarJSON {
|
||||
name: String,
|
||||
rules: Map<String, Value>,
|
||||
conflicts: Option<Vec<Vec<String>>>,
|
||||
externals: Option<Vec<RuleJSON>>,
|
||||
extras: Option<Vec<RuleJSON>>,
|
||||
inline: Option<Vec<String>>,
|
||||
word: Option<String>,
|
||||
}
|
||||
|
||||
pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
||||
let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
|
||||
|
||||
let mut variables = Vec::with_capacity(grammar_json.rules.len());
|
||||
for (name, value) in grammar_json.rules {
|
||||
variables.push(Variable {
|
||||
name: name.to_owned(),
|
||||
kind: VariableType::Named,
|
||||
rule: parse_rule(serde_json::from_value(value)?),
|
||||
})
|
||||
}
|
||||
|
||||
let extra_tokens = grammar_json
|
||||
.extras
|
||||
.unwrap_or(Vec::new())
|
||||
.into_iter()
|
||||
.map(parse_rule)
|
||||
.collect();
|
||||
let external_tokens = grammar_json
|
||||
.externals
|
||||
.unwrap_or(Vec::new())
|
||||
.into_iter()
|
||||
.map(parse_rule)
|
||||
.collect();
|
||||
let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new());
|
||||
let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new());
|
||||
|
||||
Ok(InputGrammar {
|
||||
name: grammar_json.name,
|
||||
word_token: grammar_json.word,
|
||||
variables,
|
||||
extra_tokens,
|
||||
expected_conflicts,
|
||||
external_tokens,
|
||||
variables_to_inline,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_rule(json: RuleJSON) -> Rule {
|
||||
match json {
|
||||
RuleJSON::ALIAS {
|
||||
content,
|
||||
value,
|
||||
named,
|
||||
} => Rule::alias(parse_rule(*content), value, named),
|
||||
RuleJSON::BLANK => Rule::Blank,
|
||||
RuleJSON::STRING { value } => Rule::String(value),
|
||||
RuleJSON::PATTERN { value } => Rule::Pattern(value),
|
||||
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
|
||||
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
|
||||
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
|
||||
RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
|
||||
RuleJSON::REPEAT { content } => {
|
||||
Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank])
|
||||
}
|
||||
RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)),
|
||||
RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)),
|
||||
RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)),
|
||||
RuleJSON::PREC_DYNAMIC { value, content } => {
|
||||
Rule::prec_dynamic(value, parse_rule(*content))
|
||||
}
|
||||
RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
|
||||
RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_grammar() {
|
||||
let grammar = parse_grammar(
|
||||
r#"{
|
||||
"name": "my_lang",
|
||||
"rules": {
|
||||
"file": {
|
||||
"type": "REPEAT1",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "statement"
|
||||
}
|
||||
},
|
||||
"statement": {
|
||||
"type": "STRING",
|
||||
"value": "foo"
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(grammar.name, "my_lang");
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable {
|
||||
name: "file".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
|
||||
},
|
||||
Variable {
|
||||
name: "statement".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::String("foo".to_string())
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
241
cli/src/generate/prepare_grammar/expand_repeats.rs
Normal file
241
cli/src/generate/prepare_grammar/expand_repeats.rs
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::generate::grammars::{Variable, VariableType};
|
||||
use crate::generate::rules::{Rule, Symbol};
|
||||
use hashbrown::HashMap;
|
||||
use std::mem;
|
||||
|
||||
struct Expander {
|
||||
variable_name: String,
|
||||
repeat_count_in_variable: usize,
|
||||
preceding_symbol_count: usize,
|
||||
auxiliary_variables: Vec<Variable>,
|
||||
existing_repeats: HashMap<Rule, Symbol>,
|
||||
}
|
||||
|
||||
impl Expander {
|
||||
fn expand_variable(&mut self, variable: &mut Variable) {
|
||||
self.variable_name.clear();
|
||||
self.variable_name.push_str(&variable.name);
|
||||
self.repeat_count_in_variable = 0;
|
||||
let mut rule = Rule::Blank;
|
||||
mem::swap(&mut rule, &mut variable.rule);
|
||||
variable.rule = self.expand_rule(&rule);
|
||||
}
|
||||
|
||||
fn expand_rule(&mut self, rule: &Rule) -> Rule {
|
||||
match rule {
|
||||
Rule::Choice(elements) => Rule::Choice(
|
||||
elements
|
||||
.iter()
|
||||
.map(|element| self.expand_rule(element))
|
||||
.collect(),
|
||||
),
|
||||
|
||||
Rule::Seq(elements) => Rule::Seq(
|
||||
elements
|
||||
.iter()
|
||||
.map(|element| self.expand_rule(element))
|
||||
.collect(),
|
||||
),
|
||||
|
||||
Rule::Repeat(content) => {
|
||||
let inner_rule = self.expand_rule(content);
|
||||
|
||||
if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) {
|
||||
return Rule::Symbol(*existing_symbol);
|
||||
}
|
||||
|
||||
self.repeat_count_in_variable += 1;
|
||||
let rule_name = format!(
|
||||
"{}_repeat{}",
|
||||
self.variable_name, self.repeat_count_in_variable
|
||||
);
|
||||
let repeat_symbol = Symbol::non_terminal(
|
||||
self.preceding_symbol_count + self.auxiliary_variables.len(),
|
||||
);
|
||||
self.existing_repeats
|
||||
.insert(inner_rule.clone(), repeat_symbol);
|
||||
self.auxiliary_variables.push(Variable {
|
||||
name: rule_name,
|
||||
kind: VariableType::Auxiliary,
|
||||
rule: Rule::Choice(vec![
|
||||
Rule::Seq(vec![
|
||||
Rule::Symbol(repeat_symbol),
|
||||
Rule::Symbol(repeat_symbol),
|
||||
]),
|
||||
inner_rule,
|
||||
]),
|
||||
});
|
||||
|
||||
Rule::Symbol(repeat_symbol)
|
||||
}
|
||||
|
||||
Rule::Metadata { rule, params } => Rule::Metadata {
|
||||
rule: Box::new(self.expand_rule(rule)),
|
||||
params: params.clone(),
|
||||
},
|
||||
|
||||
_ => rule.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
|
||||
let mut expander = Expander {
|
||||
variable_name: String::new(),
|
||||
repeat_count_in_variable: 0,
|
||||
preceding_symbol_count: grammar.variables.len(),
|
||||
auxiliary_variables: Vec::new(),
|
||||
existing_repeats: HashMap::new(),
|
||||
};
|
||||
|
||||
for mut variable in grammar.variables.iter_mut() {
|
||||
expander.expand_variable(&mut variable);
|
||||
}
|
||||
|
||||
grammar
|
||||
.variables
|
||||
.extend(expander.auxiliary_variables.into_iter());
|
||||
grammar
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic_repeat_expansion() {
|
||||
// Repeats nested inside of sequences and choices are expanded.
|
||||
let grammar = expand_repeats(build_grammar(vec![Variable::named(
|
||||
"rule0",
|
||||
Rule::seq(vec![
|
||||
Rule::terminal(10),
|
||||
Rule::choice(vec![
|
||||
Rule::repeat(Rule::terminal(11)),
|
||||
Rule::repeat(Rule::terminal(12)),
|
||||
]),
|
||||
Rule::terminal(13),
|
||||
]),
|
||||
)]));
|
||||
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"rule0",
|
||||
Rule::seq(vec![
|
||||
Rule::terminal(10),
|
||||
Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]),
|
||||
Rule::terminal(13),
|
||||
])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat1",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
|
||||
Rule::terminal(11),
|
||||
])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat2",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
|
||||
Rule::terminal(12),
|
||||
])
|
||||
),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_repeat_deduplication() {
|
||||
// Terminal 4 appears inside of a repeat in three different places.
|
||||
let grammar = expand_repeats(build_grammar(vec![
|
||||
Variable::named(
|
||||
"rule0",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::terminal(1), Rule::repeat(Rule::terminal(4))]),
|
||||
Rule::seq(vec![Rule::terminal(2), Rule::repeat(Rule::terminal(4))]),
|
||||
]),
|
||||
),
|
||||
Variable::named(
|
||||
"rule1",
|
||||
Rule::seq(vec![Rule::terminal(3), Rule::repeat(Rule::terminal(4))]),
|
||||
),
|
||||
]));
|
||||
|
||||
// Only one auxiliary rule is created for repeating terminal 4.
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"rule0",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::terminal(1), Rule::non_terminal(2)]),
|
||||
Rule::seq(vec![Rule::terminal(2), Rule::non_terminal(2)]),
|
||||
])
|
||||
),
|
||||
Variable::named(
|
||||
"rule1",
|
||||
Rule::seq(vec![Rule::terminal(3), Rule::non_terminal(2),])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat1",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
|
||||
Rule::terminal(4),
|
||||
])
|
||||
)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expansion_of_nested_repeats() {
|
||||
let grammar = expand_repeats(build_grammar(vec![Variable::named(
|
||||
"rule0",
|
||||
Rule::seq(vec![
|
||||
Rule::terminal(10),
|
||||
Rule::repeat(Rule::seq(vec![
|
||||
Rule::terminal(11),
|
||||
Rule::repeat(Rule::terminal(12)),
|
||||
])),
|
||||
]),
|
||||
)]));
|
||||
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"rule0",
|
||||
Rule::seq(vec![Rule::terminal(10), Rule::non_terminal(2),])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat1",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
|
||||
Rule::terminal(12),
|
||||
])
|
||||
),
|
||||
Variable::auxiliary(
|
||||
"rule0_repeat2",
|
||||
Rule::choice(vec![
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
|
||||
Rule::seq(vec![Rule::terminal(11), Rule::non_terminal(1),]),
|
||||
])
|
||||
),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
|
||||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
word_token: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
683
cli/src/generate/prepare_grammar/expand_tokens.rs
Normal file
683
cli/src/generate/prepare_grammar/expand_tokens.rs
Normal file
|
|
@ -0,0 +1,683 @@
|
|||
use super::ExtractedLexicalGrammar;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
|
||||
use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
|
||||
use crate::generate::rules::Rule;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use regex_syntax::ast::{
|
||||
parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
|
||||
};
|
||||
use std::i32;
|
||||
|
||||
lazy_static! {
|
||||
static ref CURLY_BRACE_REGEX: Regex =
|
||||
Regex::new(r#"(^|[^\\])\{([^}]*[^0-9,}][^}]*)\}"#).unwrap();
|
||||
}
|
||||
|
||||
const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
|
||||
|
||||
struct NfaBuilder {
|
||||
nfa: Nfa,
|
||||
is_sep: bool,
|
||||
precedence_stack: Vec<i32>,
|
||||
}
|
||||
|
||||
fn get_implicit_precedence(rule: &Rule) -> i32 {
|
||||
match rule {
|
||||
Rule::String(_) => 2,
|
||||
Rule::Metadata { rule, params } => {
|
||||
if params.is_main_token {
|
||||
get_implicit_precedence(rule) + 1
|
||||
} else {
|
||||
get_implicit_precedence(rule)
|
||||
}
|
||||
}
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_completion_precedence(rule: &Rule) -> i32 {
|
||||
match rule {
|
||||
Rule::Metadata { params, .. } => params.precedence.unwrap_or(0),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn preprocess_regex(content: &str) -> String {
|
||||
let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}");
|
||||
let mut result = String::with_capacity(content.len());
|
||||
let mut is_escaped = false;
|
||||
for c in content.chars() {
|
||||
if is_escaped {
|
||||
if ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) {
|
||||
result.push(c);
|
||||
} else {
|
||||
result.push('\\');
|
||||
result.push(c);
|
||||
}
|
||||
is_escaped = false;
|
||||
} else if c == '\\' {
|
||||
is_escaped = true;
|
||||
} else {
|
||||
result.push(c);
|
||||
}
|
||||
}
|
||||
if is_escaped {
|
||||
result.push('\\');
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
|
||||
let mut builder = NfaBuilder {
|
||||
nfa: Nfa::new(),
|
||||
is_sep: true,
|
||||
precedence_stack: vec![0],
|
||||
};
|
||||
|
||||
let separator_rule = if grammar.separators.len() > 0 {
|
||||
grammar.separators.push(Rule::Blank);
|
||||
Rule::repeat(Rule::choice(grammar.separators))
|
||||
} else {
|
||||
Rule::Blank
|
||||
};
|
||||
|
||||
let mut variables = Vec::new();
|
||||
for (i, variable) in grammar.variables.into_iter().enumerate() {
|
||||
let is_immediate_token = match &variable.rule {
|
||||
Rule::Metadata { params, .. } => params.is_main_token,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
builder.is_sep = false;
|
||||
builder.nfa.states.push(NfaState::Accept {
|
||||
variable_index: i,
|
||||
precedence: get_completion_precedence(&variable.rule),
|
||||
});
|
||||
let last_state_id = builder.nfa.last_state_id();
|
||||
builder
|
||||
.expand_rule(&variable.rule, last_state_id)
|
||||
.map_err(|Error(msg)| Error(format!("Rule {} {}", variable.name, msg)))?;
|
||||
|
||||
if !is_immediate_token {
|
||||
builder.is_sep = true;
|
||||
let last_state_id = builder.nfa.last_state_id();
|
||||
builder.expand_rule(&separator_rule, last_state_id)?;
|
||||
}
|
||||
|
||||
variables.push(LexicalVariable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
implicit_precedence: get_implicit_precedence(&variable.rule),
|
||||
start_state: builder.nfa.last_state_id(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(LexicalGrammar {
|
||||
nfa: builder.nfa,
|
||||
variables,
|
||||
})
|
||||
}
|
||||
|
||||
impl NfaBuilder {
|
||||
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
|
||||
match rule {
|
||||
Rule::Pattern(s) => {
|
||||
let s = preprocess_regex(s);
|
||||
let ast = parse::Parser::new()
|
||||
.parse(&s)
|
||||
.map_err(|e| Error(e.to_string()))?;
|
||||
self.expand_regex(&ast, next_state_id)
|
||||
}
|
||||
Rule::String(s) => {
|
||||
for c in s.chars().rev() {
|
||||
self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
|
||||
next_state_id = self.nfa.last_state_id();
|
||||
}
|
||||
Ok(s.len() > 0)
|
||||
}
|
||||
Rule::Choice(elements) => {
|
||||
let mut alternative_state_ids = Vec::new();
|
||||
for element in elements {
|
||||
if self.expand_rule(element, next_state_id)? {
|
||||
alternative_state_ids.push(self.nfa.last_state_id());
|
||||
} else {
|
||||
alternative_state_ids.push(next_state_id);
|
||||
}
|
||||
}
|
||||
alternative_state_ids.sort_unstable();
|
||||
alternative_state_ids.dedup();
|
||||
alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
|
||||
for alternative_state_id in alternative_state_ids {
|
||||
self.push_split(alternative_state_id);
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
Rule::Seq(elements) => {
|
||||
let mut result = false;
|
||||
for element in elements.into_iter().rev() {
|
||||
if self.expand_rule(element, next_state_id)? {
|
||||
result = true;
|
||||
}
|
||||
next_state_id = self.nfa.last_state_id();
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
Rule::Repeat(rule) => {
|
||||
self.nfa.states.push(NfaState::Accept {
|
||||
variable_index: 0,
|
||||
precedence: 0,
|
||||
}); // Placeholder for split
|
||||
let split_state_id = self.nfa.last_state_id();
|
||||
if self.expand_rule(rule, split_state_id)? {
|
||||
self.nfa.states[split_state_id as usize] =
|
||||
NfaState::Split(self.nfa.last_state_id(), next_state_id);
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
Rule::Metadata { rule, params } => {
|
||||
if let Some(precedence) = params.precedence {
|
||||
self.precedence_stack.push(precedence);
|
||||
}
|
||||
let result = self.expand_rule(rule, next_state_id);
|
||||
if params.precedence.is_some() {
|
||||
self.precedence_stack.pop();
|
||||
}
|
||||
result
|
||||
}
|
||||
Rule::Blank => Ok(false),
|
||||
_ => Err(Error::grammar(&format!("Unexpected rule {:?}", rule))),
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result<bool> {
|
||||
match ast {
|
||||
Ast::Empty(_) => Ok(false),
|
||||
Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
|
||||
Ast::Literal(literal) => {
|
||||
self.push_advance(CharacterSet::Include(vec![literal.c]), next_state_id);
|
||||
Ok(true)
|
||||
}
|
||||
Ast::Dot(_) => {
|
||||
self.push_advance(CharacterSet::Exclude(vec!['\n']), next_state_id);
|
||||
Ok(true)
|
||||
}
|
||||
Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
|
||||
Ast::Class(class) => match class {
|
||||
Class::Unicode(_) => {
|
||||
Err(Error::regex("Unicode character classes are not supported"))
|
||||
}
|
||||
Class::Perl(class) => {
|
||||
let mut chars = self.expand_perl_character_class(&class.kind);
|
||||
if class.negated {
|
||||
chars = chars.negate();
|
||||
}
|
||||
self.push_advance(chars, next_state_id);
|
||||
Ok(true)
|
||||
}
|
||||
Class::Bracketed(class) => match &class.kind {
|
||||
ClassSet::Item(item) => {
|
||||
let mut chars = self.expand_character_class(&item)?;
|
||||
if class.negated {
|
||||
chars = chars.negate();
|
||||
}
|
||||
self.push_advance(chars, next_state_id);
|
||||
Ok(true)
|
||||
}
|
||||
ClassSet::BinaryOp(_) => Err(Error::regex(
|
||||
"Binary operators in character classes aren't supported",
|
||||
)),
|
||||
},
|
||||
},
|
||||
Ast::Repetition(repetition) => match repetition.op.kind {
|
||||
RepetitionKind::ZeroOrOne => {
|
||||
self.expand_zero_or_one(&repetition.ast, next_state_id)
|
||||
}
|
||||
RepetitionKind::OneOrMore => {
|
||||
self.expand_one_or_more(&repetition.ast, next_state_id)
|
||||
}
|
||||
RepetitionKind::ZeroOrMore => {
|
||||
self.expand_zero_or_more(&repetition.ast, next_state_id)
|
||||
}
|
||||
RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
|
||||
self.expand_count(&repetition.ast, count, next_state_id)
|
||||
}
|
||||
RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
|
||||
if self.expand_zero_or_more(&repetition.ast, next_state_id)? {
|
||||
self.expand_count(&repetition.ast, min, next_state_id)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
|
||||
let mut result = self.expand_count(&repetition.ast, min, next_state_id)?;
|
||||
for _ in min..max {
|
||||
if result {
|
||||
next_state_id = self.nfa.last_state_id();
|
||||
}
|
||||
if self.expand_zero_or_one(&repetition.ast, next_state_id)? {
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
},
|
||||
Ast::Group(group) => self.expand_regex(&group.ast, next_state_id),
|
||||
Ast::Alternation(alternation) => {
|
||||
let mut alternative_state_ids = Vec::new();
|
||||
for ast in alternation.asts.iter() {
|
||||
if self.expand_regex(&ast, next_state_id)? {
|
||||
alternative_state_ids.push(self.nfa.last_state_id());
|
||||
} else {
|
||||
alternative_state_ids.push(next_state_id);
|
||||
}
|
||||
}
|
||||
alternative_state_ids.sort_unstable();
|
||||
alternative_state_ids.dedup();
|
||||
alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
|
||||
|
||||
for alternative_state_id in alternative_state_ids {
|
||||
self.push_split(alternative_state_id);
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
Ast::Concat(concat) => {
|
||||
let mut result = false;
|
||||
for ast in concat.asts.iter().rev() {
|
||||
if self.expand_regex(&ast, next_state_id)? {
|
||||
result = true;
|
||||
next_state_id = self.nfa.last_state_id();
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
|
||||
self.nfa.states.push(NfaState::Accept {
|
||||
variable_index: 0,
|
||||
precedence: 0,
|
||||
}); // Placeholder for split
|
||||
let split_state_id = self.nfa.last_state_id();
|
||||
if self.expand_regex(&ast, split_state_id)? {
|
||||
self.nfa.states[split_state_id as usize] =
|
||||
NfaState::Split(self.nfa.last_state_id(), next_state_id);
|
||||
Ok(true)
|
||||
} else {
|
||||
self.nfa.states.pop();
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
|
||||
if self.expand_regex(ast, next_state_id)? {
|
||||
self.push_split(next_state_id);
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
|
||||
if self.expand_one_or_more(&ast, next_state_id)? {
|
||||
self.push_split(next_state_id);
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result<bool> {
|
||||
let mut result = false;
|
||||
for _ in 0..count {
|
||||
if self.expand_regex(ast, next_state_id)? {
|
||||
result = true;
|
||||
next_state_id = self.nfa.last_state_id();
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn expand_character_class(&self, item: &ClassSetItem) -> Result<CharacterSet> {
|
||||
match item {
|
||||
ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
|
||||
ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
|
||||
ClassSetItem::Range(range) => {
|
||||
Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
|
||||
}
|
||||
ClassSetItem::Union(union) => {
|
||||
let mut result = CharacterSet::empty();
|
||||
for item in &union.items {
|
||||
result = result.add(&self.expand_character_class(&item)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
ClassSetItem::Perl(class) => Ok(self.expand_perl_character_class(&class.kind)),
|
||||
_ => Err(Error::regex(&format!(
|
||||
"Unsupported character class syntax {:?}",
|
||||
item
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet {
|
||||
match item {
|
||||
ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
|
||||
ClassPerlKind::Space => CharacterSet::empty()
|
||||
.add_char(' ')
|
||||
.add_char('\t')
|
||||
.add_char('\r')
|
||||
.add_char('\n'),
|
||||
ClassPerlKind::Word => CharacterSet::empty()
|
||||
.add_char('_')
|
||||
.add_range('A', 'Z')
|
||||
.add_range('a', 'z')
|
||||
.add_range('0', '9'),
|
||||
}
|
||||
}
|
||||
|
||||
fn push_advance(&mut self, chars: CharacterSet, state_id: u32) {
|
||||
let precedence = *self.precedence_stack.last().unwrap();
|
||||
self.nfa.states.push(NfaState::Advance {
|
||||
chars,
|
||||
state_id,
|
||||
precedence,
|
||||
is_sep: self.is_sep,
|
||||
});
|
||||
}
|
||||
|
||||
fn push_split(&mut self, state_id: u32) {
|
||||
let last_state_id = self.nfa.last_state_id();
|
||||
self.nfa
|
||||
.states
|
||||
.push(NfaState::Split(state_id, last_state_id));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::Variable;
|
||||
use crate::generate::nfa::{NfaCursor, NfaTransition};
|
||||
|
||||
fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
|
||||
let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
|
||||
let mut cursor = NfaCursor::new(&grammar.nfa, start_states);
|
||||
|
||||
let mut result = None;
|
||||
let mut result_precedence = i32::MIN;
|
||||
let mut start_char = 0;
|
||||
let mut end_char = 0;
|
||||
for c in s.chars() {
|
||||
for (id, precedence) in cursor.completions() {
|
||||
if result.is_none() || result_precedence <= precedence {
|
||||
result = Some((id, &s[start_char..end_char]));
|
||||
result_precedence = precedence;
|
||||
}
|
||||
}
|
||||
if let Some(NfaTransition {
|
||||
states,
|
||||
is_separator,
|
||||
..
|
||||
}) = cursor
|
||||
.transitions()
|
||||
.into_iter()
|
||||
.find(|t| t.characters.contains(c) && t.precedence >= result_precedence)
|
||||
{
|
||||
cursor.reset(states);
|
||||
end_char += 1;
|
||||
if is_separator {
|
||||
start_char = end_char;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (id, precedence) in cursor.completions() {
|
||||
if result.is_none() || result_precedence <= precedence {
|
||||
result = Some((id, &s[start_char..end_char]));
|
||||
result_precedence = precedence;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rule_expansion() {
|
||||
struct Row {
|
||||
rules: Vec<Rule>,
|
||||
separators: Vec<Rule>,
|
||||
examples: Vec<(&'static str, Option<(usize, &'static str)>)>,
|
||||
}
|
||||
|
||||
let table = [
|
||||
// regex with sequences and alternatives
|
||||
Row {
|
||||
rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("ade1", Some((0, "ade"))),
|
||||
("bdf1", Some((0, "bdf"))),
|
||||
("bdfh1", Some((0, "bdfh"))),
|
||||
("ad1", None),
|
||||
],
|
||||
},
|
||||
// regex with repeats
|
||||
Row {
|
||||
rules: vec![Rule::pattern("a*")],
|
||||
separators: vec![],
|
||||
examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
|
||||
},
|
||||
// regex with repeats in sequences
|
||||
Row {
|
||||
rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("af1", Some((0, "af"))),
|
||||
("adedef1", Some((0, "adedef"))),
|
||||
("abcbcbcf1", Some((0, "abcbcbcf"))),
|
||||
("a", None),
|
||||
],
|
||||
},
|
||||
// regex with character ranges
|
||||
Row {
|
||||
rules: vec![Rule::pattern("[a-fA-F0-9]+")],
|
||||
separators: vec![],
|
||||
examples: vec![("A1ff0.", Some((0, "A1ff0")))],
|
||||
},
|
||||
// regex with perl character classes
|
||||
Row {
|
||||
rules: vec![Rule::pattern("\\w\\d\\s")],
|
||||
separators: vec![],
|
||||
examples: vec![("_0 ", Some((0, "_0 ")))],
|
||||
},
|
||||
// string
|
||||
Row {
|
||||
rules: vec![Rule::string("abc")],
|
||||
separators: vec![],
|
||||
examples: vec![("abcd", Some((0, "abc"))), ("ab", None)],
|
||||
},
|
||||
// complex rule containing strings and regexes
|
||||
Row {
|
||||
rules: vec![Rule::repeat(Rule::seq(vec![
|
||||
Rule::string("{"),
|
||||
Rule::pattern("[a-f]+"),
|
||||
Rule::string("}"),
|
||||
]))],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("{a}{", Some((0, "{a}"))),
|
||||
("{a}{d", Some((0, "{a}"))),
|
||||
("ab", None),
|
||||
],
|
||||
},
|
||||
// longest match rule
|
||||
Row {
|
||||
rules: vec![
|
||||
Rule::pattern("a|bc"),
|
||||
Rule::pattern("aa"),
|
||||
Rule::pattern("bcd"),
|
||||
],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("a.", Some((0, "a"))),
|
||||
("bc.", Some((0, "bc"))),
|
||||
("aa.", Some((1, "aa"))),
|
||||
("bcd?", Some((2, "bcd"))),
|
||||
("b.", None),
|
||||
("c.", None),
|
||||
],
|
||||
},
|
||||
// regex with an alternative including the empty string
|
||||
Row {
|
||||
rules: vec![Rule::pattern("a(b|)+c")],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("ac.", Some((0, "ac"))),
|
||||
("abc.", Some((0, "abc"))),
|
||||
("abbc.", Some((0, "abbc"))),
|
||||
],
|
||||
},
|
||||
// separators
|
||||
Row {
|
||||
rules: vec![Rule::pattern("[a-f]+")],
|
||||
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
|
||||
examples: vec![
|
||||
(" a", Some((0, "a"))),
|
||||
(" \nb", Some((0, "b"))),
|
||||
(" \\a", None),
|
||||
(" \\\na", Some((0, "a"))),
|
||||
],
|
||||
},
|
||||
// shorter tokens with higher precedence
|
||||
Row {
|
||||
rules: vec![
|
||||
Rule::prec(2, Rule::pattern("abc")),
|
||||
Rule::prec(1, Rule::pattern("ab[cd]e")),
|
||||
Rule::pattern("[a-e]+"),
|
||||
],
|
||||
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
|
||||
examples: vec![
|
||||
("abceef", Some((0, "abc"))),
|
||||
("abdeef", Some((1, "abde"))),
|
||||
("aeeeef", Some((2, "aeeee"))),
|
||||
],
|
||||
},
|
||||
// immediate tokens with higher precedence
|
||||
Row {
|
||||
rules: vec![
|
||||
Rule::prec(1, Rule::pattern("[^a]+")),
|
||||
Rule::immediate_token(Rule::prec(2, Rule::pattern("[^ab]+"))),
|
||||
],
|
||||
separators: vec![Rule::pattern("\\s")],
|
||||
examples: vec![("cccb", Some((1, "ccc")))],
|
||||
},
|
||||
Row {
|
||||
rules: vec![Rule::seq(vec![
|
||||
Rule::string("a"),
|
||||
Rule::choice(vec![Rule::string("b"), Rule::string("c")]),
|
||||
Rule::string("d"),
|
||||
])],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("abd", Some((0, "abd"))),
|
||||
("acd", Some((0, "acd"))),
|
||||
("abc", None),
|
||||
("ad", None),
|
||||
("d", None),
|
||||
("a", None),
|
||||
],
|
||||
},
|
||||
// nested choices within sequences
|
||||
Row {
|
||||
rules: vec![Rule::seq(vec![
|
||||
Rule::pattern("[0-9]+"),
|
||||
Rule::choice(vec![
|
||||
Rule::Blank,
|
||||
Rule::choice(vec![Rule::seq(vec![
|
||||
Rule::choice(vec![Rule::string("e"), Rule::string("E")]),
|
||||
Rule::choice(vec![
|
||||
Rule::Blank,
|
||||
Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
|
||||
]),
|
||||
Rule::pattern("[0-9]+"),
|
||||
])]),
|
||||
]),
|
||||
])],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("12", Some((0, "12"))),
|
||||
("12e", Some((0, "12"))),
|
||||
("12g", Some((0, "12"))),
|
||||
("12e3", Some((0, "12e3"))),
|
||||
("12e+", Some((0, "12"))),
|
||||
("12E+34 +", Some((0, "12E+34"))),
|
||||
("12e34", Some((0, "12e34"))),
|
||||
],
|
||||
},
|
||||
// nested groups
|
||||
Row {
|
||||
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
|
||||
separators: vec![],
|
||||
examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
|
||||
},
|
||||
// allowing unrecognized escape sequences
|
||||
Row {
|
||||
rules: vec![
|
||||
// Escaped forward slash (used in JS because '/' is the regex delimiter)
|
||||
Rule::pattern(r#"\/"#),
|
||||
// Escaped quotes
|
||||
Rule::pattern(r#"\"\'"#),
|
||||
// Quote preceded by a literal backslash
|
||||
Rule::pattern(r#"[\\']+"#),
|
||||
],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("/", Some((0, "/"))),
|
||||
("\"\'", Some((1, "\"\'"))),
|
||||
(r#"'\'a"#, Some((2, r#"'\'"#))),
|
||||
],
|
||||
},
|
||||
// allowing un-escaped curly braces
|
||||
Row {
|
||||
rules: vec![
|
||||
// Un-escaped curly braces
|
||||
Rule::pattern(r#"u{[0-9a-fA-F]+}"#),
|
||||
// Already-escaped curly braces
|
||||
Rule::pattern(r#"\{[ab]{3}\}"#),
|
||||
],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("u{1234} ok", Some((0, "u{1234}"))),
|
||||
("{aba}}", Some((1, "{aba}"))),
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
for Row {
|
||||
rules,
|
||||
separators,
|
||||
examples,
|
||||
} in &table
|
||||
{
|
||||
let grammar = expand_tokens(ExtractedLexicalGrammar {
|
||||
separators: separators.clone(),
|
||||
variables: rules
|
||||
.into_iter()
|
||||
.map(|rule| Variable::named("", rule.clone()))
|
||||
.collect(),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
for (haystack, needle) in examples.iter() {
|
||||
assert_eq!(simulate_nfa(&grammar, haystack), *needle);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
222
cli/src/generate/prepare_grammar/extract_simple_aliases.rs
Normal file
222
cli/src/generate/prepare_grammar/extract_simple_aliases.rs
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct SymbolStatus {
|
||||
alias: Option<Alias>,
|
||||
conflicting: bool,
|
||||
}
|
||||
|
||||
pub(super) fn extract_simple_aliases(
|
||||
syntax_grammar: &mut SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> AliasMap {
|
||||
// Determine which symbols in the grammars are *always* aliased to a single name.
|
||||
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
|
||||
let mut non_terminal_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
|
||||
let mut external_status_list =
|
||||
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
|
||||
for variable in syntax_grammar.variables.iter() {
|
||||
for production in variable.productions.iter() {
|
||||
for step in production.steps.iter() {
|
||||
let mut status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &mut external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &mut non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &mut terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if step.alias.is_none() {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
|
||||
if !status.conflicting {
|
||||
if status.alias.is_none() {
|
||||
status.alias = step.alias.clone();
|
||||
} else if status.alias != step.alias {
|
||||
status.alias = None;
|
||||
status.conflicting = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the aliases for those symbols.
|
||||
for variable in syntax_grammar.variables.iter_mut() {
|
||||
for production in variable.productions.iter_mut() {
|
||||
for step in production.steps.iter_mut() {
|
||||
let status = match step.symbol {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
} => &external_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
} => &non_terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
} => &terminal_status_list[index],
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
..
|
||||
} => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if status.alias.is_some() {
|
||||
step.alias = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Populate a map of the symbols to their aliases.
|
||||
let mut result = AliasMap::new();
|
||||
for (i, status) in terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in non_terminal_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::non_terminal(i), alias);
|
||||
}
|
||||
}
|
||||
for (i, status) in external_status_list.into_iter().enumerate() {
|
||||
if let Some(alias) = status.alias {
|
||||
result.insert(Symbol::external(i), alias);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
use crate::generate::nfa::Nfa;
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_aliases() {
|
||||
let mut syntax_grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// Token 0 is always aliased as "a1".
|
||||
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
|
||||
// Token 1 is aliased above, but not here.
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
// Token 2 is aliased differently than above.
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
extra_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let lexical_grammar = LexicalGrammar {
|
||||
nfa: Nfa::new(),
|
||||
variables: vec![
|
||||
LexicalVariable {
|
||||
name: "t1".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t2".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
LexicalVariable {
|
||||
name: "t3".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
assert_eq!(simple_aliases.len(), 1);
|
||||
assert_eq!(
|
||||
simple_aliases[&Symbol::terminal(0)],
|
||||
Alias {
|
||||
value: "a1".to_string(),
|
||||
is_named: true,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "v1".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// 'Simple' alias removed
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
// Other aliases unchanged
|
||||
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "v2".to_owned(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
|
||||
],
|
||||
},],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
525
cli/src/generate/prepare_grammar/extract_tokens.rs
Normal file
525
cli/src/generate/prepare_grammar/extract_tokens.rs
Normal file
|
|
@ -0,0 +1,525 @@
|
|||
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::generate::grammars::{ExternalToken, Variable, VariableType};
|
||||
use crate::generate::rules::{MetadataParams, Rule, Symbol, SymbolType};
|
||||
use hashbrown::HashMap;
|
||||
use std::mem;
|
||||
|
||||
pub(super) fn extract_tokens(
|
||||
mut grammar: InternedGrammar,
|
||||
) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
|
||||
let mut extractor = TokenExtractor {
|
||||
current_variable_name: String::new(),
|
||||
current_variable_token_count: 0,
|
||||
extracted_variables: Vec::new(),
|
||||
extracted_usage_counts: Vec::new(),
|
||||
};
|
||||
|
||||
for mut variable in grammar.variables.iter_mut() {
|
||||
extractor.extract_tokens_in_variable(&mut variable);
|
||||
}
|
||||
|
||||
for mut variable in grammar.external_tokens.iter_mut() {
|
||||
extractor.extract_tokens_in_variable(&mut variable);
|
||||
}
|
||||
|
||||
let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
|
||||
for variable in extractor.extracted_variables {
|
||||
lexical_variables.push(Variable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
rule: variable.rule,
|
||||
});
|
||||
}
|
||||
|
||||
// If a variable's entire rule was extracted as a token and that token didn't
|
||||
// appear within any other rule, then remove that variable from the syntax
|
||||
// grammar, giving its name to the token in the lexical grammar. Any symbols
|
||||
// that pointed to that variable will need to be updated to point to the
|
||||
// variable in the lexical grammar. Symbols that pointed to later variables
|
||||
// will need to have their indices decremented.
|
||||
let mut variables = Vec::new();
|
||||
let mut symbol_replacer = SymbolReplacer {
|
||||
replacements: HashMap::new(),
|
||||
};
|
||||
for (i, variable) in grammar.variables.into_iter().enumerate() {
|
||||
if let Rule::Symbol(Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
}) = variable.rule
|
||||
{
|
||||
if i > 0 && extractor.extracted_usage_counts[index] == 1 {
|
||||
let mut lexical_variable = &mut lexical_variables[index];
|
||||
lexical_variable.kind = variable.kind;
|
||||
lexical_variable.name = variable.name;
|
||||
symbol_replacer.replacements.insert(i, index);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
variables.push(variable);
|
||||
}
|
||||
|
||||
for variable in variables.iter_mut() {
|
||||
variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
|
||||
}
|
||||
|
||||
let expected_conflicts = grammar
|
||||
.expected_conflicts
|
||||
.into_iter()
|
||||
.map(|conflict| {
|
||||
let mut result: Vec<_> = conflict
|
||||
.iter()
|
||||
.map(|symbol| symbol_replacer.replace_symbol(*symbol))
|
||||
.collect();
|
||||
result.sort_unstable();
|
||||
result.dedup();
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
|
||||
let variables_to_inline = grammar
|
||||
.variables_to_inline
|
||||
.into_iter()
|
||||
.map(|symbol| symbol_replacer.replace_symbol(symbol))
|
||||
.collect();
|
||||
|
||||
let mut separators = Vec::new();
|
||||
let mut extra_tokens = Vec::new();
|
||||
for rule in grammar.extra_tokens {
|
||||
if let Rule::Symbol(symbol) = rule {
|
||||
let new_symbol = symbol_replacer.replace_symbol(symbol);
|
||||
if new_symbol.is_non_terminal() {
|
||||
return Err(Error(format!(
|
||||
"Non-token symbol '{}' cannot be used as an extra token",
|
||||
&variables[new_symbol.index].name
|
||||
)));
|
||||
} else {
|
||||
extra_tokens.push(new_symbol);
|
||||
}
|
||||
} else {
|
||||
if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
|
||||
extra_tokens.push(Symbol::terminal(index));
|
||||
} else {
|
||||
separators.push(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut external_tokens = Vec::new();
|
||||
for external_token in grammar.external_tokens {
|
||||
let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
|
||||
if let Rule::Symbol(symbol) = rule {
|
||||
if symbol.is_non_terminal() {
|
||||
return Err(Error(format!(
|
||||
"Rule '{}' cannot be used as both an external token and a non-terminal rule",
|
||||
&variables[symbol.index].name,
|
||||
)));
|
||||
}
|
||||
|
||||
if symbol.is_external() {
|
||||
external_tokens.push(ExternalToken {
|
||||
name: external_token.name,
|
||||
kind: external_token.kind,
|
||||
corresponding_internal_token: None,
|
||||
})
|
||||
} else {
|
||||
external_tokens.push(ExternalToken {
|
||||
name: lexical_variables[symbol.index].name.clone(),
|
||||
kind: external_token.kind,
|
||||
corresponding_internal_token: Some(symbol),
|
||||
})
|
||||
}
|
||||
} else {
|
||||
return Err(Error(format!(
|
||||
"Non-symbol rules cannot be used as external tokens"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let mut word_token = None;
|
||||
if let Some(token) = grammar.word_token {
|
||||
let token = symbol_replacer.replace_symbol(token);
|
||||
if token.is_non_terminal() {
|
||||
return Err(Error(format!(
|
||||
"Non-terminal symbol '{}' cannot be used as the word token",
|
||||
&variables[token.index].name
|
||||
)));
|
||||
}
|
||||
word_token = Some(token);
|
||||
}
|
||||
|
||||
Ok((
|
||||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
expected_conflicts,
|
||||
extra_tokens,
|
||||
variables_to_inline,
|
||||
external_tokens,
|
||||
word_token,
|
||||
},
|
||||
ExtractedLexicalGrammar {
|
||||
variables: lexical_variables,
|
||||
separators,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
struct TokenExtractor {
|
||||
current_variable_name: String,
|
||||
current_variable_token_count: usize,
|
||||
extracted_variables: Vec<Variable>,
|
||||
extracted_usage_counts: Vec<usize>,
|
||||
}
|
||||
|
||||
struct SymbolReplacer {
|
||||
replacements: HashMap<usize, usize>,
|
||||
}
|
||||
|
||||
impl TokenExtractor {
|
||||
fn extract_tokens_in_variable(&mut self, variable: &mut Variable) {
|
||||
self.current_variable_name.clear();
|
||||
self.current_variable_name.push_str(&variable.name);
|
||||
self.current_variable_token_count = 0;
|
||||
let mut rule = Rule::Blank;
|
||||
mem::swap(&mut rule, &mut variable.rule);
|
||||
variable.rule = self.extract_tokens_in_rule(&rule);
|
||||
}
|
||||
|
||||
fn extract_tokens_in_rule(&mut self, input: &Rule) -> Rule {
|
||||
match input {
|
||||
Rule::String(name) => self.extract_token(input, Some(name)).into(),
|
||||
Rule::Pattern(..) => self.extract_token(input, None).into(),
|
||||
Rule::Metadata { params, rule } => {
|
||||
if params.is_token {
|
||||
let mut params = params.clone();
|
||||
params.is_token = false;
|
||||
|
||||
let mut string_value = None;
|
||||
if let Rule::String(value) = rule.as_ref() {
|
||||
string_value = Some(value);
|
||||
}
|
||||
|
||||
let rule_to_extract = if params == MetadataParams::default() {
|
||||
rule.as_ref()
|
||||
} else {
|
||||
input
|
||||
};
|
||||
|
||||
self.extract_token(rule_to_extract, string_value).into()
|
||||
} else {
|
||||
Rule::Metadata {
|
||||
params: params.clone(),
|
||||
rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
|
||||
}
|
||||
}
|
||||
}
|
||||
Rule::Repeat(content) => Rule::Repeat(Box::new(self.extract_tokens_in_rule(content))),
|
||||
Rule::Seq(elements) => Rule::Seq(
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| self.extract_tokens_in_rule(e))
|
||||
.collect(),
|
||||
),
|
||||
Rule::Choice(elements) => Rule::Choice(
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| self.extract_tokens_in_rule(e))
|
||||
.collect(),
|
||||
),
|
||||
_ => input.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Symbol {
|
||||
for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
|
||||
if variable.rule == *rule {
|
||||
self.extracted_usage_counts[i] += 1;
|
||||
return Symbol::terminal(i);
|
||||
}
|
||||
}
|
||||
|
||||
let index = self.extracted_variables.len();
|
||||
let variable = if let Some(string_value) = string_value {
|
||||
Variable {
|
||||
name: string_value.clone(),
|
||||
kind: VariableType::Anonymous,
|
||||
rule: rule.clone(),
|
||||
}
|
||||
} else {
|
||||
self.current_variable_token_count += 1;
|
||||
Variable {
|
||||
name: format!(
|
||||
"{}_token{}",
|
||||
&self.current_variable_name, self.current_variable_token_count
|
||||
),
|
||||
kind: VariableType::Auxiliary,
|
||||
rule: rule.clone(),
|
||||
}
|
||||
};
|
||||
|
||||
self.extracted_variables.push(variable);
|
||||
self.extracted_usage_counts.push(1);
|
||||
Symbol::terminal(index)
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolReplacer {
|
||||
fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
|
||||
match rule {
|
||||
Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
|
||||
Rule::Choice(elements) => Rule::Choice(
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| self.replace_symbols_in_rule(e))
|
||||
.collect(),
|
||||
),
|
||||
Rule::Seq(elements) => Rule::Seq(
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| self.replace_symbols_in_rule(e))
|
||||
.collect(),
|
||||
),
|
||||
Rule::Repeat(content) => Rule::Repeat(Box::new(self.replace_symbols_in_rule(content))),
|
||||
Rule::Metadata { rule, params } => Rule::Metadata {
|
||||
params: params.clone(),
|
||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
||||
},
|
||||
_ => rule.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn replace_symbol(&self, symbol: Symbol) -> Symbol {
|
||||
if !symbol.is_non_terminal() {
|
||||
return symbol;
|
||||
}
|
||||
|
||||
if let Some(replacement) = self.replacements.get(&symbol.index) {
|
||||
return Symbol::terminal(*replacement);
|
||||
}
|
||||
|
||||
let mut adjusted_index = symbol.index;
|
||||
for (replaced_index, _) in self.replacements.iter() {
|
||||
if *replaced_index < symbol.index {
|
||||
adjusted_index -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
return Symbol::non_terminal(adjusted_index);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::generate::grammars::VariableType;
|
||||
|
||||
#[test]
|
||||
fn test_extraction() {
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
|
||||
Variable::named(
|
||||
"rule_0",
|
||||
Rule::repeat(Rule::seq(vec![
|
||||
Rule::string("a"),
|
||||
Rule::pattern("b"),
|
||||
Rule::choice(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
Rule::token(Rule::repeat(Rule::choice(vec![
|
||||
Rule::string("c"),
|
||||
Rule::string("d"),
|
||||
]))),
|
||||
]),
|
||||
])),
|
||||
),
|
||||
Variable::named("rule_1", Rule::pattern("e")),
|
||||
Variable::named("rule_2", Rule::pattern("b")),
|
||||
Variable::named(
|
||||
"rule_3",
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
|
||||
),
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"rule_0",
|
||||
Rule::repeat(Rule::seq(vec![
|
||||
// The string "a" was replaced by a symbol referencing the lexical grammar
|
||||
Rule::terminal(0),
|
||||
// The pattern "b" was replaced by a symbol referencing the lexical grammar
|
||||
Rule::terminal(1),
|
||||
Rule::choice(vec![
|
||||
// The symbol referencing `rule_1` was replaced by a symbol referencing
|
||||
// the lexical grammar.
|
||||
Rule::terminal(3),
|
||||
// The symbol referencing `rule_2` had its index decremented because
|
||||
// `rule_1` was moved to the lexical grammar.
|
||||
Rule::non_terminal(1),
|
||||
// The rule wrapped in `token` was replaced by a symbol referencing
|
||||
// the lexical grammar.
|
||||
Rule::terminal(2),
|
||||
])
|
||||
]))
|
||||
),
|
||||
// The pattern "e" was only used in once place: as the definition of `rule_1`,
|
||||
// so that rule was moved to the lexical grammar. The pattern "b" appeared in
|
||||
// two places, so it was not moved into the lexical grammar.
|
||||
Variable::named("rule_2", Rule::terminal(1)),
|
||||
Variable::named(
|
||||
"rule_3",
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::Blank,])
|
||||
),
|
||||
]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
lexical_grammar.variables,
|
||||
vec![
|
||||
Variable::anonymous("a", Rule::string("a")),
|
||||
Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
|
||||
Variable::auxiliary(
|
||||
"rule_0_token2",
|
||||
Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
|
||||
),
|
||||
Variable::named("rule_1", Rule::pattern("e")),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_start_rule_is_token() {
|
||||
let (syntax_grammar, lexical_grammar) =
|
||||
extract_tokens(build_grammar(vec![Variable::named(
|
||||
"rule_0",
|
||||
Rule::string("hello"),
|
||||
)]))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.variables,
|
||||
vec![Variable::named("rule_0", Rule::terminal(0)),]
|
||||
);
|
||||
assert_eq!(
|
||||
lexical_grammar.variables,
|
||||
vec![Variable::anonymous("hello", Rule::string("hello")),]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extracting_extra_tokens() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::string("x")),
|
||||
Variable::named("comment", Rule::pattern("//.*")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)];
|
||||
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
|
||||
assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]);
|
||||
assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_externals() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named(
|
||||
"rule_0",
|
||||
Rule::seq(vec![
|
||||
Rule::external(0),
|
||||
Rule::string("a"),
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
]),
|
||||
),
|
||||
Variable::named("rule_1", Rule::string("b")),
|
||||
Variable::named("rule_2", Rule::string("c")),
|
||||
]);
|
||||
grammar.external_tokens = vec![
|
||||
Variable::named("external_0", Rule::external(0)),
|
||||
Variable::anonymous("a", Rule::string("a")),
|
||||
Variable::named("rule_2", Rule::non_terminal(2)),
|
||||
];
|
||||
|
||||
let (syntax_grammar, _) = extract_tokens(grammar).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
syntax_grammar.external_tokens,
|
||||
vec![
|
||||
ExternalToken {
|
||||
name: "external_0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
corresponding_internal_token: None,
|
||||
},
|
||||
ExternalToken {
|
||||
name: "a".to_string(),
|
||||
kind: VariableType::Anonymous,
|
||||
corresponding_internal_token: Some(Symbol::terminal(0)),
|
||||
},
|
||||
ExternalToken {
|
||||
name: "rule_2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
corresponding_internal_token: Some(Symbol::terminal(2)),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_on_non_terminal_symbol_extras() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named("rule_0", Rule::non_terminal(1)),
|
||||
Variable::named("rule_1", Rule::non_terminal(2)),
|
||||
Variable::named("rule_2", Rule::string("x")),
|
||||
]);
|
||||
grammar.extra_tokens = vec![Rule::non_terminal(1)];
|
||||
|
||||
match extract_tokens(grammar) {
|
||||
Err(Error(s)) => {
|
||||
assert_eq!(
|
||||
s,
|
||||
"Non-token symbol 'rule_1' cannot be used as an extra token"
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected an error but got no error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_on_external_with_same_name_as_non_terminal() {
|
||||
let mut grammar = build_grammar(vec![
|
||||
Variable::named(
|
||||
"rule_0",
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||
),
|
||||
Variable::named(
|
||||
"rule_1",
|
||||
Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2)]),
|
||||
),
|
||||
Variable::named("rule_2", Rule::string("a")),
|
||||
]);
|
||||
grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
|
||||
|
||||
match extract_tokens(grammar) {
|
||||
Err(Error(s)) => {
|
||||
assert_eq!(s, "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected an error but got no error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
|
||||
InternedGrammar {
|
||||
variables,
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
word_token: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
358
cli/src/generate/prepare_grammar/flatten_grammar.rs
Normal file
358
cli/src/generate/prepare_grammar/flatten_grammar.rs
Normal file
|
|
@ -0,0 +1,358 @@
|
|||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::generate::grammars::{
|
||||
Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
|
||||
};
|
||||
use crate::generate::rules::Symbol;
|
||||
use crate::generate::rules::{Alias, Associativity, Rule};
|
||||
|
||||
struct RuleFlattener {
|
||||
production: Production,
|
||||
precedence_stack: Vec<i32>,
|
||||
associativity_stack: Vec<Associativity>,
|
||||
alias_stack: Vec<Alias>,
|
||||
}
|
||||
|
||||
impl RuleFlattener {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
production: Production {
|
||||
steps: Vec::new(),
|
||||
dynamic_precedence: 0,
|
||||
},
|
||||
precedence_stack: Vec::new(),
|
||||
associativity_stack: Vec::new(),
|
||||
alias_stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten(mut self, rule: Rule) -> Production {
|
||||
self.apply(rule, true);
|
||||
self.production
|
||||
}
|
||||
|
||||
fn apply(&mut self, rule: Rule, at_end: bool) -> bool {
|
||||
match rule {
|
||||
Rule::Seq(members) => {
|
||||
let mut result = false;
|
||||
let last_index = members.len() - 1;
|
||||
for (i, member) in members.into_iter().enumerate() {
|
||||
result |= self.apply(member, i == last_index && at_end);
|
||||
}
|
||||
result
|
||||
}
|
||||
Rule::Metadata { rule, params } => {
|
||||
let mut has_precedence = false;
|
||||
if let Some(precedence) = params.precedence {
|
||||
has_precedence = true;
|
||||
self.precedence_stack.push(precedence);
|
||||
}
|
||||
|
||||
let mut has_associativity = false;
|
||||
if let Some(associativity) = params.associativity {
|
||||
has_associativity = true;
|
||||
self.associativity_stack.push(associativity);
|
||||
}
|
||||
|
||||
let mut has_alias = false;
|
||||
if let Some(alias) = params.alias {
|
||||
has_alias = true;
|
||||
self.alias_stack.push(alias);
|
||||
}
|
||||
|
||||
if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
|
||||
self.production.dynamic_precedence = params.dynamic_precedence;
|
||||
}
|
||||
|
||||
let did_push = self.apply(*rule, at_end);
|
||||
|
||||
if has_precedence {
|
||||
self.precedence_stack.pop();
|
||||
if did_push && !at_end {
|
||||
self.production.steps.last_mut().unwrap().precedence =
|
||||
self.precedence_stack.last().cloned().unwrap_or(0);
|
||||
}
|
||||
}
|
||||
|
||||
if has_associativity {
|
||||
self.associativity_stack.pop();
|
||||
if did_push && !at_end {
|
||||
self.production.steps.last_mut().unwrap().associativity =
|
||||
self.associativity_stack.last().cloned();
|
||||
}
|
||||
}
|
||||
|
||||
if has_alias {
|
||||
self.alias_stack.pop();
|
||||
}
|
||||
|
||||
did_push
|
||||
}
|
||||
Rule::Symbol(symbol) => {
|
||||
self.production.steps.push(ProductionStep {
|
||||
symbol,
|
||||
precedence: self.precedence_stack.last().cloned().unwrap_or(0),
|
||||
associativity: self.associativity_stack.last().cloned(),
|
||||
alias: self.alias_stack.last().cloned(),
|
||||
});
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_choices(rule: Rule) -> Vec<Rule> {
|
||||
match rule {
|
||||
Rule::Seq(elements) => {
|
||||
let mut result = vec![Rule::Blank];
|
||||
for element in elements {
|
||||
let extraction = extract_choices(element);
|
||||
let mut next_result = Vec::new();
|
||||
for entry in result {
|
||||
for extraction_entry in extraction.iter() {
|
||||
next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
|
||||
}
|
||||
}
|
||||
result = next_result;
|
||||
}
|
||||
result
|
||||
}
|
||||
Rule::Choice(elements) => {
|
||||
let mut result = Vec::new();
|
||||
for element in elements {
|
||||
for rule in extract_choices(element) {
|
||||
result.push(rule);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
Rule::Metadata { rule, params } => extract_choices(*rule)
|
||||
.into_iter()
|
||||
.map(|rule| Rule::Metadata {
|
||||
rule: Box::new(rule),
|
||||
params: params.clone(),
|
||||
})
|
||||
.collect(),
|
||||
_ => vec![rule],
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
|
||||
let mut productions = Vec::new();
|
||||
for rule in extract_choices(variable.rule) {
|
||||
let production = RuleFlattener::new().flatten(rule);
|
||||
if !productions.contains(&production) {
|
||||
productions.push(production);
|
||||
}
|
||||
}
|
||||
Ok(SyntaxVariable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
productions,
|
||||
})
|
||||
}
|
||||
|
||||
fn symbol_is_used(variables: &Vec<SyntaxVariable>, symbol: Symbol) -> bool {
|
||||
for variable in variables {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if step.symbol == symbol {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
|
||||
let mut variables = Vec::new();
|
||||
for variable in grammar.variables {
|
||||
variables.push(flatten_variable(variable)?);
|
||||
}
|
||||
for (i, variable) in variables.iter().enumerate() {
|
||||
for production in &variable.productions {
|
||||
if production.steps.is_empty() && symbol_is_used(&variables, Symbol::non_terminal(i)) {
|
||||
return Err(Error(format!(
|
||||
"The rule `{}` matches the empty string.
|
||||
|
||||
Tree-sitter does not support syntactic rules that match the empty string
|
||||
unless they are used only as the grammar's start rule.
|
||||
",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(SyntaxGrammar {
|
||||
extra_tokens: grammar.extra_tokens,
|
||||
expected_conflicts: grammar.expected_conflicts,
|
||||
variables_to_inline: grammar.variables_to_inline,
|
||||
external_tokens: grammar.external_tokens,
|
||||
word_token: grammar.word_token,
|
||||
variables,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::VariableType;
|
||||
use crate::generate::rules::Symbol;
|
||||
|
||||
#[test]
|
||||
fn test_flatten_grammar() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_left(
|
||||
101,
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_right(
|
||||
102,
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(3))
|
||||
.with_prec(102, Some(Associativity::Right)),
|
||||
ProductionStep::new(Symbol::non_terminal(4))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
]
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(5))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
]
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_maximum_dynamic_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_dynamic(
|
||||
101,
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_dynamic(
|
||||
102,
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![
|
||||
Production {
|
||||
dynamic_precedence: 102,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::non_terminal(3)),
|
||||
ProductionStep::new(Symbol::non_terminal(4)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 101,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::non_terminal(5)),
|
||||
ProductionStep::new(Symbol::non_terminal(6)),
|
||||
ProductionStep::new(Symbol::non_terminal(7)),
|
||||
],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_final_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
101,
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||
),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::non_terminal(2))
|
||||
.with_prec(101, Some(Associativity::Left)),
|
||||
]
|
||||
}]
|
||||
);
|
||||
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(101, Rule::seq(vec![Rule::non_terminal(1)])),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_prec(101, Some(Associativity::Left)),]
|
||||
}]
|
||||
);
|
||||
}
|
||||
}
|
||||
236
cli/src/generate/prepare_grammar/intern_symbols.rs
Normal file
236
cli/src/generate/prepare_grammar/intern_symbols.rs
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
use super::InternedGrammar;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::generate::grammars::{InputGrammar, Variable, VariableType};
|
||||
use crate::generate::rules::{Rule, Symbol};
|
||||
|
||||
pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
|
||||
let interner = Interner { grammar };
|
||||
|
||||
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
|
||||
return Err(Error("A grammar's start rule must be visible.".to_string()));
|
||||
}
|
||||
|
||||
let mut variables = Vec::with_capacity(grammar.variables.len());
|
||||
for variable in grammar.variables.iter() {
|
||||
variables.push(Variable {
|
||||
name: variable.name.clone(),
|
||||
kind: variable_type_for_name(&variable.name),
|
||||
rule: interner.intern_rule(&variable.rule)?,
|
||||
});
|
||||
}
|
||||
|
||||
let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
|
||||
for external_token in grammar.external_tokens.iter() {
|
||||
let rule = interner.intern_rule(&external_token)?;
|
||||
let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
|
||||
(name.clone(), variable_type_for_name(&name))
|
||||
} else {
|
||||
(String::new(), VariableType::Anonymous)
|
||||
};
|
||||
external_tokens.push(Variable { name, kind, rule });
|
||||
}
|
||||
|
||||
let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
|
||||
for extra_token in grammar.extra_tokens.iter() {
|
||||
extra_tokens.push(interner.intern_rule(extra_token)?);
|
||||
}
|
||||
|
||||
let mut expected_conflicts = Vec::new();
|
||||
for conflict in grammar.expected_conflicts.iter() {
|
||||
let mut interned_conflict = Vec::with_capacity(conflict.len());
|
||||
for name in conflict {
|
||||
interned_conflict.push(
|
||||
interner
|
||||
.intern_name(&name)
|
||||
.ok_or_else(|| Error::undefined_symbol(name))?,
|
||||
);
|
||||
}
|
||||
expected_conflicts.push(interned_conflict);
|
||||
}
|
||||
|
||||
let mut variables_to_inline = Vec::new();
|
||||
for name in grammar.variables_to_inline.iter() {
|
||||
if let Some(symbol) = interner.intern_name(&name) {
|
||||
variables_to_inline.push(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
let mut word_token = None;
|
||||
if let Some(name) = grammar.word_token.as_ref() {
|
||||
word_token = Some(
|
||||
interner
|
||||
.intern_name(&name)
|
||||
.ok_or_else(|| Error::undefined_symbol(&name))?,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(InternedGrammar {
|
||||
variables,
|
||||
external_tokens,
|
||||
extra_tokens,
|
||||
expected_conflicts,
|
||||
variables_to_inline,
|
||||
word_token,
|
||||
})
|
||||
}
|
||||
|
||||
struct Interner<'a> {
|
||||
grammar: &'a InputGrammar,
|
||||
}
|
||||
|
||||
impl<'a> Interner<'a> {
|
||||
fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
|
||||
match rule {
|
||||
Rule::Choice(elements) => {
|
||||
let mut result = Vec::with_capacity(elements.len());
|
||||
for element in elements {
|
||||
result.push(self.intern_rule(element)?);
|
||||
}
|
||||
Ok(Rule::Choice(result))
|
||||
}
|
||||
Rule::Seq(elements) => {
|
||||
let mut result = Vec::with_capacity(elements.len());
|
||||
for element in elements {
|
||||
result.push(self.intern_rule(element)?);
|
||||
}
|
||||
Ok(Rule::Seq(result))
|
||||
}
|
||||
Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
|
||||
Rule::Metadata { rule, params } => Ok(Rule::Metadata {
|
||||
rule: Box::new(self.intern_rule(rule)?),
|
||||
params: params.clone(),
|
||||
}),
|
||||
|
||||
Rule::NamedSymbol(name) => {
|
||||
if let Some(symbol) = self.intern_name(&name) {
|
||||
Ok(Rule::Symbol(symbol))
|
||||
} else {
|
||||
Err(Error::undefined_symbol(name))
|
||||
}
|
||||
}
|
||||
|
||||
_ => Ok(rule.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn intern_name(&self, symbol: &str) -> Option<Symbol> {
|
||||
for (i, variable) in self.grammar.variables.iter().enumerate() {
|
||||
if variable.name == symbol {
|
||||
return Some(Symbol::non_terminal(i));
|
||||
}
|
||||
}
|
||||
|
||||
for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
|
||||
if let Rule::NamedSymbol(name) = external_token {
|
||||
if name == symbol {
|
||||
return Some(Symbol::external(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
fn variable_type_for_name(name: &str) -> VariableType {
|
||||
if name.starts_with("_") {
|
||||
VariableType::Hidden
|
||||
} else {
|
||||
VariableType::Named
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic_repeat_expansion() {
|
||||
let grammar = intern_symbols(&build_grammar(vec![
|
||||
Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])),
|
||||
Variable::named("y", Rule::named("_z")),
|
||||
Variable::named("_z", Rule::string("a")),
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"x",
|
||||
Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),])
|
||||
),
|
||||
Variable::named("y", Rule::non_terminal(2)),
|
||||
Variable::hidden("_z", Rule::string("a")),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interning_external_token_names() {
|
||||
// Variable `y` is both an internal and an external token.
|
||||
// Variable `z` is just an external token.
|
||||
let mut input_grammar = build_grammar(vec![
|
||||
Variable::named(
|
||||
"w",
|
||||
Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]),
|
||||
),
|
||||
Variable::named("x", Rule::string("a")),
|
||||
Variable::named("y", Rule::string("b")),
|
||||
]);
|
||||
input_grammar
|
||||
.external_tokens
|
||||
.extend(vec![Rule::named("y"), Rule::named("z")]);
|
||||
|
||||
let grammar = intern_symbols(&input_grammar).unwrap();
|
||||
|
||||
// Variable `y` is referred to by its internal index.
|
||||
// Variable `z` is referred to by its external index.
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named(
|
||||
"w",
|
||||
Rule::choice(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::non_terminal(2),
|
||||
Rule::external(1),
|
||||
])
|
||||
),
|
||||
Variable::named("x", Rule::string("a")),
|
||||
Variable::named("y", Rule::string("b")),
|
||||
]
|
||||
);
|
||||
|
||||
// The external token for `y` refers back to its internal index.
|
||||
assert_eq!(
|
||||
grammar.external_tokens,
|
||||
vec![
|
||||
Variable::named("y", Rule::non_terminal(2)),
|
||||
Variable::named("z", Rule::external(1)),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grammar_with_undefined_symbols() {
|
||||
let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
|
||||
|
||||
match result {
|
||||
Err(Error(message)) => assert_eq!(message, "Undefined symbol `y`"),
|
||||
_ => panic!("Expected an error but got none"),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
|
||||
InputGrammar {
|
||||
variables,
|
||||
name: "the_language".to_string(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
word_token: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
57
cli/src/generate/prepare_grammar/mod.rs
Normal file
57
cli/src/generate/prepare_grammar/mod.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
mod expand_repeats;
|
||||
mod expand_tokens;
|
||||
mod extract_simple_aliases;
|
||||
mod extract_tokens;
|
||||
mod flatten_grammar;
|
||||
mod intern_symbols;
|
||||
mod process_inlines;
|
||||
|
||||
use self::expand_repeats::expand_repeats;
|
||||
pub(crate) use self::expand_tokens::expand_tokens;
|
||||
use self::extract_simple_aliases::extract_simple_aliases;
|
||||
use self::extract_tokens::extract_tokens;
|
||||
use self::flatten_grammar::flatten_grammar;
|
||||
use self::intern_symbols::intern_symbols;
|
||||
use self::process_inlines::process_inlines;
|
||||
use crate::error::Result;
|
||||
use crate::generate::grammars::{
|
||||
ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable,
|
||||
};
|
||||
use crate::generate::rules::{AliasMap, Rule, Symbol};
|
||||
|
||||
pub(crate) struct IntermediateGrammar<T, U> {
|
||||
variables: Vec<Variable>,
|
||||
extra_tokens: Vec<T>,
|
||||
expected_conflicts: Vec<Vec<Symbol>>,
|
||||
external_tokens: Vec<U>,
|
||||
variables_to_inline: Vec<Symbol>,
|
||||
word_token: Option<Symbol>,
|
||||
}
|
||||
|
||||
pub(crate) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
|
||||
|
||||
pub(crate) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ExtractedLexicalGrammar {
|
||||
pub variables: Vec<Variable>,
|
||||
pub separators: Vec<Rule>,
|
||||
}
|
||||
|
||||
pub(crate) fn prepare_grammar(
|
||||
input_grammar: &InputGrammar,
|
||||
) -> Result<(
|
||||
SyntaxGrammar,
|
||||
LexicalGrammar,
|
||||
InlinedProductionMap,
|
||||
AliasMap,
|
||||
)> {
|
||||
let interned_grammar = intern_symbols(input_grammar)?;
|
||||
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
|
||||
let syntax_grammar = expand_repeats(syntax_grammar);
|
||||
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
|
||||
let lexical_grammar = expand_tokens(lexical_grammar)?;
|
||||
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
|
||||
let inlines = process_inlines(&syntax_grammar);
|
||||
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
|
||||
}
|
||||
479
cli/src/generate/prepare_grammar/process_inlines.rs
Normal file
479
cli/src/generate/prepare_grammar/process_inlines.rs
Normal file
|
|
@ -0,0 +1,479 @@
|
|||
use crate::generate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
|
||||
use hashbrown::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
struct ProductionStepId {
|
||||
// A `None` value here means that the production itself was produced via inlining,
|
||||
// and is stored in the the builder's `productions` vector, as opposed to being
|
||||
// stored in one of the grammar's variables.
|
||||
variable_index: Option<usize>,
|
||||
production_index: usize,
|
||||
step_index: usize,
|
||||
}
|
||||
|
||||
struct InlinedProductionMapBuilder {
|
||||
production_indices_by_step_id: HashMap<ProductionStepId, Vec<usize>>,
|
||||
productions: Vec<Production>,
|
||||
}
|
||||
|
||||
impl InlinedProductionMapBuilder {
|
||||
fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap {
|
||||
let mut step_ids_to_process = Vec::new();
|
||||
for (variable_index, variable) in grammar.variables.iter().enumerate() {
|
||||
for production_index in 0..variable.productions.len() {
|
||||
step_ids_to_process.push(ProductionStepId {
|
||||
variable_index: Some(variable_index),
|
||||
production_index,
|
||||
step_index: 0,
|
||||
});
|
||||
while !step_ids_to_process.is_empty() {
|
||||
let mut i = 0;
|
||||
while i < step_ids_to_process.len() {
|
||||
let step_id = step_ids_to_process[i];
|
||||
if let Some(step) = self.production_step_for_id(step_id, grammar) {
|
||||
if grammar.variables_to_inline.contains(&step.symbol) {
|
||||
let inlined_step_ids = self
|
||||
.inline_production_at_step(step_id, grammar)
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.map(|production_index| ProductionStepId {
|
||||
variable_index: None,
|
||||
production_index,
|
||||
step_index: step_id.step_index,
|
||||
});
|
||||
step_ids_to_process.splice(i..i + 1, inlined_step_ids);
|
||||
} else {
|
||||
step_ids_to_process[i] = ProductionStepId {
|
||||
variable_index: step_id.variable_index,
|
||||
production_index: step_id.production_index,
|
||||
step_index: step_id.step_index + 1,
|
||||
};
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
step_ids_to_process.remove(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let productions = self.productions;
|
||||
let production_indices_by_step_id = self.production_indices_by_step_id;
|
||||
let production_map = production_indices_by_step_id
|
||||
.into_iter()
|
||||
.map(|(step_id, production_indices)| {
|
||||
let production = if let Some(variable_index) = step_id.variable_index {
|
||||
&grammar.variables[variable_index].productions[step_id.production_index]
|
||||
} else {
|
||||
&productions[step_id.production_index]
|
||||
} as *const Production;
|
||||
((production, step_id.step_index as u32), production_indices)
|
||||
})
|
||||
.collect();
|
||||
|
||||
InlinedProductionMap {
|
||||
productions,
|
||||
production_map,
|
||||
}
|
||||
}
|
||||
|
||||
fn inline_production_at_step<'a>(
|
||||
&'a mut self,
|
||||
step_id: ProductionStepId,
|
||||
grammar: &'a SyntaxGrammar,
|
||||
) -> &'a Vec<usize> {
|
||||
// Build a list of productions produced by inlining rules.
|
||||
let mut i = 0;
|
||||
let step_index = step_id.step_index;
|
||||
let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()];
|
||||
while i < productions_to_add.len() {
|
||||
if let Some(step) = productions_to_add[i].steps.get(step_index) {
|
||||
let symbol = step.symbol.clone();
|
||||
if grammar.variables_to_inline.contains(&symbol) {
|
||||
// Remove the production from the vector, replacing it with a placeholder.
|
||||
let production = productions_to_add
|
||||
.splice(i..i + 1, [Production::default()].iter().cloned())
|
||||
.next()
|
||||
.unwrap();
|
||||
|
||||
// Replace the placeholder with the inlined productions.
|
||||
productions_to_add.splice(
|
||||
i..i + 1,
|
||||
grammar.variables[symbol.index].productions.iter().map(|p| {
|
||||
let mut production = production.clone();
|
||||
let removed_step = production
|
||||
.steps
|
||||
.splice(step_index..(step_index + 1), p.steps.iter().cloned())
|
||||
.next()
|
||||
.unwrap();
|
||||
let inserted_steps =
|
||||
&mut production.steps[step_index..(step_index + p.steps.len())];
|
||||
if let Some(alias) = removed_step.alias {
|
||||
for inserted_step in inserted_steps.iter_mut() {
|
||||
inserted_step.alias = Some(alias.clone());
|
||||
}
|
||||
}
|
||||
if let Some(last_inserted_step) = inserted_steps.last_mut() {
|
||||
if last_inserted_step.precedence == 0 {
|
||||
last_inserted_step.precedence = removed_step.precedence;
|
||||
}
|
||||
if last_inserted_step.associativity == None {
|
||||
last_inserted_step.associativity = removed_step.associativity;
|
||||
}
|
||||
}
|
||||
production
|
||||
}),
|
||||
);
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// Store all the computed productions.
|
||||
let result = productions_to_add
|
||||
.into_iter()
|
||||
.map(|production| {
|
||||
self.productions
|
||||
.iter()
|
||||
.position(|p| *p == production)
|
||||
.unwrap_or({
|
||||
self.productions.push(production);
|
||||
self.productions.len() - 1
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Cache these productions based on the original production step.
|
||||
self.production_indices_by_step_id
|
||||
.entry(step_id)
|
||||
.or_insert(result)
|
||||
}
|
||||
|
||||
fn production_for_id<'a>(
|
||||
&'a self,
|
||||
id: ProductionStepId,
|
||||
grammar: &'a SyntaxGrammar,
|
||||
) -> &'a Production {
|
||||
if let Some(variable_index) = id.variable_index {
|
||||
&grammar.variables[variable_index].productions[id.production_index]
|
||||
} else {
|
||||
&self.productions[id.production_index]
|
||||
}
|
||||
}
|
||||
|
||||
fn production_step_for_id<'a>(
|
||||
&'a self,
|
||||
id: ProductionStepId,
|
||||
grammar: &'a SyntaxGrammar,
|
||||
) -> Option<&'a ProductionStep> {
|
||||
self.production_for_id(id, grammar).steps.get(id.step_index)
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
|
||||
InlinedProductionMapBuilder {
|
||||
productions: Vec::new(),
|
||||
production_indices_by_step_id: HashMap::new(),
|
||||
}
|
||||
.build(grammar)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{ProductionStep, SyntaxVariable, VariableType};
|
||||
use crate::generate::rules::{Associativity, Symbol};
|
||||
|
||||
#[test]
|
||||
fn test_basic_inlining() {
|
||||
let grammar = SyntaxGrammar {
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
variables_to_inline: vec![Symbol::non_terminal(1)],
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)), // inlined
|
||||
ProductionStep::new(Symbol::terminal(11)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-1".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(12)),
|
||||
ProductionStep::new(Symbol::terminal(13)),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(14))],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
let inline_map = process_inlines(&grammar);
|
||||
|
||||
// Nothing to inline at step 0.
|
||||
assert!(inline_map
|
||||
.inlined_productions(&grammar.variables[0].productions[0], 0)
|
||||
.is_none());
|
||||
|
||||
// Inlining variable 1 yields two productions.
|
||||
assert_eq!(
|
||||
inline_map
|
||||
.inlined_productions(&grammar.variables[0].productions[0], 1)
|
||||
.unwrap()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>(),
|
||||
vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::terminal(12)),
|
||||
ProductionStep::new(Symbol::terminal(13)),
|
||||
ProductionStep::new(Symbol::terminal(11)),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::terminal(14)),
|
||||
ProductionStep::new(Symbol::terminal(11)),
|
||||
],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nested_inlining() {
|
||||
let grammar = SyntaxGrammar {
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)), // inlined
|
||||
ProductionStep::new(Symbol::terminal(11)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)), // inlined
|
||||
ProductionStep::new(Symbol::terminal(12)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-1".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(13))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::non_terminal(3)), // inlined
|
||||
ProductionStep::new(Symbol::terminal(14)),
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(15))],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-3".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(16))],
|
||||
}],
|
||||
},
|
||||
],
|
||||
variables_to_inline: vec![
|
||||
Symbol::non_terminal(1),
|
||||
Symbol::non_terminal(2),
|
||||
Symbol::non_terminal(3),
|
||||
],
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
let inline_map = process_inlines(&grammar);
|
||||
|
||||
let productions: Vec<&Production> = inline_map
|
||||
.inlined_productions(&grammar.variables[0].productions[0], 1)
|
||||
.unwrap()
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
productions.iter().cloned().cloned().collect::<Vec<_>>(),
|
||||
vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::terminal(13)),
|
||||
ProductionStep::new(Symbol::terminal(11)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(12)),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::terminal(16)),
|
||||
ProductionStep::new(Symbol::terminal(14)),
|
||||
ProductionStep::new(Symbol::terminal(11)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(12)),
|
||||
],
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
inline_map
|
||||
.inlined_productions(productions[0], 3)
|
||||
.unwrap()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>(),
|
||||
vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::terminal(13)),
|
||||
ProductionStep::new(Symbol::terminal(11)),
|
||||
ProductionStep::new(Symbol::terminal(15)),
|
||||
ProductionStep::new(Symbol::terminal(12)),
|
||||
],
|
||||
},]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inlining_with_precedence_and_alias() {
|
||||
let grammar = SyntaxGrammar {
|
||||
variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
|
||||
variables: vec![
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// inlined
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_prec(1, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
// inlined
|
||||
ProductionStep::new(Symbol::non_terminal(2))
|
||||
.with_alias("outer_alias", true),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-1".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(11))
|
||||
.with_prec(2, None)
|
||||
.with_alias("inner_alias", true),
|
||||
ProductionStep::new(Symbol::terminal(12)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "non-terminal-2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(13))],
|
||||
}],
|
||||
},
|
||||
],
|
||||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
let inline_map = process_inlines(&grammar);
|
||||
|
||||
let productions: Vec<_> = inline_map
|
||||
.inlined_productions(&grammar.variables[0].productions[0], 0)
|
||||
.unwrap()
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
productions.iter().cloned().cloned().collect::<Vec<_>>(),
|
||||
vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
// The first step in the inlined production retains its precedence
|
||||
// and alias.
|
||||
ProductionStep::new(Symbol::terminal(11))
|
||||
.with_prec(2, None)
|
||||
.with_alias("inner_alias", true),
|
||||
// The final step of the inlined production inherits the precedence of
|
||||
// the inlined step.
|
||||
ProductionStep::new(Symbol::terminal(12))
|
||||
.with_prec(1, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true),
|
||||
]
|
||||
}],
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
inline_map
|
||||
.inlined_productions(productions[0], 3)
|
||||
.unwrap()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>(),
|
||||
vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(11))
|
||||
.with_prec(2, None)
|
||||
.with_alias("inner_alias", true),
|
||||
ProductionStep::new(Symbol::terminal(12))
|
||||
.with_prec(1, Some(Associativity::Left)),
|
||||
ProductionStep::new(Symbol::terminal(10)),
|
||||
// All steps of the inlined production inherit their alias from the
|
||||
// inlined step.
|
||||
ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true),
|
||||
]
|
||||
}],
|
||||
);
|
||||
}
|
||||
}
|
||||
1044
cli/src/generate/render.rs
Normal file
1044
cli/src/generate/render.rs
Normal file
File diff suppressed because it is too large
Load diff
231
cli/src/generate/rules.rs
Normal file
231
cli/src/generate/rules.rs
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
use hashbrown::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) enum SymbolType {
|
||||
External,
|
||||
End,
|
||||
Terminal,
|
||||
NonTerminal,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) enum Associativity {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) struct Alias {
|
||||
pub value: String,
|
||||
pub is_named: bool,
|
||||
}
|
||||
|
||||
pub(crate) type AliasMap = HashMap<Symbol, Alias>;
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct MetadataParams {
|
||||
pub precedence: Option<i32>,
|
||||
pub dynamic_precedence: i32,
|
||||
pub associativity: Option<Associativity>,
|
||||
pub is_token: bool,
|
||||
pub is_string: bool,
|
||||
pub is_active: bool,
|
||||
pub is_main_token: bool,
|
||||
pub alias: Option<Alias>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) struct Symbol {
|
||||
pub kind: SymbolType,
|
||||
pub index: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub(crate) enum Rule {
|
||||
Blank,
|
||||
String(String),
|
||||
Pattern(String),
|
||||
NamedSymbol(String),
|
||||
Symbol(Symbol),
|
||||
Choice(Vec<Rule>),
|
||||
Metadata {
|
||||
params: MetadataParams,
|
||||
rule: Box<Rule>,
|
||||
},
|
||||
Repeat(Box<Rule>),
|
||||
Seq(Vec<Rule>),
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
|
||||
add_metadata(content, move |params| {
|
||||
params.alias = Some(Alias { is_named, value });
|
||||
})
|
||||
}
|
||||
|
||||
pub fn token(content: Rule) -> Self {
|
||||
add_metadata(content, |params| {
|
||||
params.is_token = true;
|
||||
})
|
||||
}
|
||||
|
||||
pub fn immediate_token(content: Rule) -> Self {
|
||||
add_metadata(content, |params| {
|
||||
params.is_token = true;
|
||||
params.is_main_token = true;
|
||||
})
|
||||
}
|
||||
|
||||
pub fn prec(value: i32, content: Rule) -> Self {
|
||||
add_metadata(content, |params| {
|
||||
params.precedence = Some(value);
|
||||
})
|
||||
}
|
||||
|
||||
pub fn prec_left(value: i32, content: Rule) -> Self {
|
||||
add_metadata(content, |params| {
|
||||
params.associativity = Some(Associativity::Left);
|
||||
params.precedence = Some(value);
|
||||
})
|
||||
}
|
||||
|
||||
pub fn prec_right(value: i32, content: Rule) -> Self {
|
||||
add_metadata(content, |params| {
|
||||
params.associativity = Some(Associativity::Right);
|
||||
params.precedence = Some(value);
|
||||
})
|
||||
}
|
||||
|
||||
pub fn prec_dynamic(value: i32, content: Rule) -> Self {
|
||||
add_metadata(content, |params| {
|
||||
params.dynamic_precedence = value;
|
||||
})
|
||||
}
|
||||
|
||||
pub fn repeat(rule: Rule) -> Self {
|
||||
Rule::Repeat(Box::new(rule))
|
||||
}
|
||||
|
||||
pub fn choice(rules: Vec<Rule>) -> Self {
|
||||
let mut elements = Vec::with_capacity(rules.len());
|
||||
for rule in rules {
|
||||
choice_helper(&mut elements, rule);
|
||||
}
|
||||
Rule::Choice(elements)
|
||||
}
|
||||
|
||||
pub fn seq(rules: Vec<Rule>) -> Self {
|
||||
Rule::Seq(rules)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Rule {
|
||||
pub fn terminal(index: usize) -> Self {
|
||||
Rule::Symbol(Symbol::terminal(index))
|
||||
}
|
||||
|
||||
pub fn non_terminal(index: usize) -> Self {
|
||||
Rule::Symbol(Symbol::non_terminal(index))
|
||||
}
|
||||
|
||||
pub fn external(index: usize) -> Self {
|
||||
Rule::Symbol(Symbol::external(index))
|
||||
}
|
||||
|
||||
pub fn named(name: &'static str) -> Self {
|
||||
Rule::NamedSymbol(name.to_string())
|
||||
}
|
||||
|
||||
pub fn string(value: &'static str) -> Self {
|
||||
Rule::String(value.to_string())
|
||||
}
|
||||
|
||||
pub fn pattern(value: &'static str) -> Self {
|
||||
Rule::Pattern(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl Symbol {
|
||||
pub fn is_terminal(&self) -> bool {
|
||||
self.kind == SymbolType::Terminal
|
||||
}
|
||||
|
||||
pub fn is_non_terminal(&self) -> bool {
|
||||
self.kind == SymbolType::NonTerminal
|
||||
}
|
||||
|
||||
pub fn is_external(&self) -> bool {
|
||||
self.kind == SymbolType::External
|
||||
}
|
||||
|
||||
pub fn is_eof(&self) -> bool {
|
||||
self.kind == SymbolType::End
|
||||
}
|
||||
|
||||
pub fn non_terminal(index: usize) -> Self {
|
||||
Symbol {
|
||||
kind: SymbolType::NonTerminal,
|
||||
index,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn terminal(index: usize) -> Self {
|
||||
Symbol {
|
||||
kind: SymbolType::Terminal,
|
||||
index,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn external(index: usize) -> Self {
|
||||
Symbol {
|
||||
kind: SymbolType::External,
|
||||
index,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn end() -> Self {
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Symbol> for Rule {
|
||||
fn from(symbol: Symbol) -> Self {
|
||||
Rule::Symbol(symbol)
|
||||
}
|
||||
}
|
||||
|
||||
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
|
||||
match input {
|
||||
Rule::Metadata { rule, mut params } => {
|
||||
f(&mut params);
|
||||
Rule::Metadata { rule, params }
|
||||
}
|
||||
_ => {
|
||||
let mut params = MetadataParams::default();
|
||||
f(&mut params);
|
||||
Rule::Metadata {
|
||||
rule: Box::new(input),
|
||||
params,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
|
||||
match rule {
|
||||
Rule::Choice(elements) => {
|
||||
for element in elements {
|
||||
choice_helper(result, element);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if !result.contains(&rule) {
|
||||
result.push(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
140
cli/src/generate/tables.rs
Normal file
140
cli/src/generate/tables.rs
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, Associativity, Symbol};
|
||||
use hashbrown::HashMap;
|
||||
|
||||
pub(crate) type AliasSequenceId = usize;
|
||||
pub(crate) type ParseStateId = usize;
|
||||
pub(crate) type LexStateId = usize;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ParseAction {
|
||||
Accept,
|
||||
Shift {
|
||||
state: ParseStateId,
|
||||
is_repetition: bool,
|
||||
},
|
||||
ShiftExtra,
|
||||
Recover,
|
||||
Reduce {
|
||||
symbol: Symbol,
|
||||
child_count: usize,
|
||||
precedence: i32,
|
||||
dynamic_precedence: i32,
|
||||
associativity: Option<Associativity>,
|
||||
alias_sequence_id: AliasSequenceId,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTableEntry {
|
||||
pub actions: Vec<ParseAction>,
|
||||
pub reusable: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseState {
|
||||
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
|
||||
pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
|
||||
pub lex_state_id: usize,
|
||||
pub unfinished_item_signature: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTable {
|
||||
pub states: Vec<ParseState>,
|
||||
pub symbols: Vec<Symbol>,
|
||||
pub alias_sequences: Vec<Vec<Option<Alias>>>,
|
||||
pub max_aliased_production_length: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct AdvanceAction {
|
||||
pub state: Option<LexStateId>,
|
||||
pub in_main_token: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct LexState {
|
||||
pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
|
||||
pub accept_action: Option<Symbol>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct LexTable {
|
||||
pub states: Vec<LexState>,
|
||||
}
|
||||
|
||||
impl ParseTableEntry {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
reusable: true,
|
||||
actions: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LexTable {
|
||||
fn default() -> Self {
|
||||
LexTable { states: Vec::new() }
|
||||
}
|
||||
}
|
||||
|
||||
impl ParseState {
|
||||
pub fn referenced_states<'a>(&'a self) -> impl Iterator<Item = ParseStateId> + 'a {
|
||||
self.terminal_entries
|
||||
.iter()
|
||||
.flat_map(|(_, entry)| {
|
||||
entry.actions.iter().filter_map(|action| match action {
|
||||
ParseAction::Shift { state, .. } => Some(*state),
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
|
||||
}
|
||||
|
||||
pub fn update_referenced_states<F>(&mut self, mut f: F)
|
||||
where
|
||||
F: FnMut(usize, &ParseState) -> usize,
|
||||
{
|
||||
let mut updates = Vec::new();
|
||||
for (symbol, entry) in &self.terminal_entries {
|
||||
for (i, action) in entry.actions.iter().enumerate() {
|
||||
if let ParseAction::Shift { state, .. } = action {
|
||||
let result = f(*state, self);
|
||||
if result != *state {
|
||||
updates.push((*symbol, i, result));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (symbol, other_state) in &self.nonterminal_entries {
|
||||
let result = f(*other_state, self);
|
||||
if result != *other_state {
|
||||
updates.push((*symbol, 0, result));
|
||||
}
|
||||
}
|
||||
for (symbol, action_index, new_state) in updates {
|
||||
if symbol.is_non_terminal() {
|
||||
self.nonterminal_entries.insert(symbol, new_state);
|
||||
} else {
|
||||
let entry = self.terminal_entries.get_mut(&symbol).unwrap();
|
||||
if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
|
||||
entry.actions[action_index] = ParseAction::Shift {
|
||||
state: new_state,
|
||||
is_repetition,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ParseAction {
|
||||
pub fn precedence(&self) -> i32 {
|
||||
if let ParseAction::Reduce { precedence, .. } = self {
|
||||
*precedence
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
28
cli/src/generate/templates/binding.cc
Normal file
28
cli/src/generate/templates/binding.cc
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#include "tree_sitter/parser.h"
|
||||
#include <node.h>
|
||||
#include "nan.h"
|
||||
|
||||
using namespace v8;
|
||||
|
||||
extern "C" TSLanguage * tree_sitter_PARSER_NAME();
|
||||
|
||||
namespace {
|
||||
|
||||
NAN_METHOD(New) {}
|
||||
|
||||
void Init(Handle<Object> exports, Handle<Object> module) {
|
||||
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
|
||||
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Local<Function> constructor = tpl->GetFunction();
|
||||
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
|
||||
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_PARSER_NAME());
|
||||
|
||||
instance->Set(Nan::New("name").ToLocalChecked(), Nan::New("PARSER_NAME").ToLocalChecked());
|
||||
module->Set(Nan::New("exports").ToLocalChecked(), instance);
|
||||
}
|
||||
|
||||
NODE_MODULE(tree_sitter_PARSER_NAME_binding, Init)
|
||||
|
||||
} // namespace
|
||||
18
cli/src/generate/templates/binding.gyp
Normal file
18
cli/src/generate/templates/binding.gyp
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "tree_sitter_PARSER_NAME_binding",
|
||||
"include_dirs": [
|
||||
"<!(node -e \"require('nan')\")",
|
||||
"src"
|
||||
],
|
||||
"sources": [
|
||||
"src/parser.c",
|
||||
"src/binding.cc"
|
||||
],
|
||||
"cflags_c": [
|
||||
"-std=c99",
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
9
cli/src/generate/templates/index.js
Normal file
9
cli/src/generate/templates/index.js
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
try {
|
||||
module.exports = require("./build/Release/tree_sitter_PARSER_NAME_binding");
|
||||
} catch (error) {
|
||||
try {
|
||||
module.exports = require("./build/Debug/tree_sitter_PARSER_NAME_binding");
|
||||
} catch (_) {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
11
cli/src/lib.rs
Normal file
11
cli/src/lib.rs
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
pub mod error;
|
||||
pub mod generate;
|
||||
pub mod loader;
|
||||
pub mod logger;
|
||||
pub mod parse;
|
||||
pub mod properties;
|
||||
pub mod test;
|
||||
pub mod util;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
321
cli/src/loader.rs
Normal file
321
cli/src/loader.rs
Normal file
|
|
@ -0,0 +1,321 @@
|
|||
use libloading::{Library, Symbol};
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use serde_derive::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::mem;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::SystemTime;
|
||||
use tree_sitter::{Language, PropertySheet};
|
||||
|
||||
#[cfg(unix)]
|
||||
const DYLIB_EXTENSION: &'static str = "so";
|
||||
|
||||
#[cfg(windows)]
|
||||
const DYLIB_EXTENSION: &'static str = "dll";
|
||||
|
||||
const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
|
||||
|
||||
struct LanguageRepo {
|
||||
name: String,
|
||||
path: PathBuf,
|
||||
language: Option<Language>,
|
||||
configurations: Vec<LanguageConfiguration>,
|
||||
}
|
||||
|
||||
pub struct LanguageConfiguration {
|
||||
_name: String,
|
||||
_content_regex: Option<Regex>,
|
||||
_first_line_regex: Option<Regex>,
|
||||
file_types: Vec<String>,
|
||||
_highlight_property_sheet: Option<Result<PropertySheet, PathBuf>>,
|
||||
}
|
||||
|
||||
pub struct Loader {
|
||||
parser_lib_path: PathBuf,
|
||||
language_repos: Vec<LanguageRepo>,
|
||||
language_configuration_ids_by_file_type: HashMap<String, Vec<(usize, usize)>>,
|
||||
}
|
||||
|
||||
unsafe impl Send for Loader {}
|
||||
unsafe impl Sync for Loader {}
|
||||
|
||||
impl Loader {
|
||||
pub fn new(parser_lib_path: PathBuf) -> Self {
|
||||
Loader {
|
||||
parser_lib_path,
|
||||
language_repos: Vec::new(),
|
||||
language_configuration_ids_by_file_type: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_all_languages(&mut self, parser_src_paths: &Vec<PathBuf>) -> io::Result<()> {
|
||||
for parser_container_dir in parser_src_paths.iter() {
|
||||
for entry in fs::read_dir(parser_container_dir)? {
|
||||
let entry = entry?;
|
||||
if let Some(parser_dir_name) = entry.file_name().to_str() {
|
||||
if parser_dir_name.starts_with("tree-sitter-") {
|
||||
self.find_language_at_path(&parser_container_dir.join(parser_dir_name))
|
||||
.ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn language_at_path(&mut self, path: &Path) -> io::Result<Option<Language>> {
|
||||
if let Ok(id) = self.find_language_at_path(path) {
|
||||
Ok(Some(self.language_configuration_for_id(id)?.0))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn language_configuration_for_file_name(
|
||||
&mut self,
|
||||
path: &Path,
|
||||
) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
|
||||
let ids = path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
|
||||
.or_else(|| {
|
||||
path.extension()
|
||||
.and_then(|extension| extension.to_str())
|
||||
.and_then(|extension| {
|
||||
self.language_configuration_ids_by_file_type.get(extension)
|
||||
})
|
||||
});
|
||||
if let Some(ids) = ids {
|
||||
// TODO use `content-regex` to pick one
|
||||
for (repo_id, configuration_id) in ids.iter().cloned() {
|
||||
let (language, configurations) = self.language_configuration_for_id(repo_id)?;
|
||||
return Ok(Some((language, &configurations[configuration_id])));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn language_configuration_for_id(
|
||||
&mut self,
|
||||
id: usize,
|
||||
) -> io::Result<(Language, &Vec<LanguageConfiguration>)> {
|
||||
let repo = &self.language_repos[id];
|
||||
let language = if let Some(language) = repo.language {
|
||||
language
|
||||
} else {
|
||||
let src_path = repo.path.join("src");
|
||||
let language = self.load_language_at_path(&repo.name, &src_path, &src_path)?;
|
||||
self.language_repos[id].language = Some(language);
|
||||
language
|
||||
};
|
||||
Ok((language, &self.language_repos[id].configurations))
|
||||
}
|
||||
|
||||
pub fn load_language_at_path(
|
||||
&self,
|
||||
name: &str,
|
||||
src_path: &Path,
|
||||
header_path: &Path,
|
||||
) -> io::Result<Language> {
|
||||
let parser_path = src_path.join("parser.c");
|
||||
|
||||
let scanner_path;
|
||||
let scanner_c_path = src_path.join("scanner.c");
|
||||
if scanner_c_path.exists() {
|
||||
scanner_path = Some(scanner_c_path);
|
||||
} else {
|
||||
let scanner_cc_path = src_path.join("scanner.cc");
|
||||
if scanner_cc_path.exists() {
|
||||
scanner_path = Some(scanner_cc_path);
|
||||
} else {
|
||||
scanner_path = None;
|
||||
}
|
||||
}
|
||||
|
||||
self.load_language_from_sources(name, &header_path, &parser_path, &scanner_path)
|
||||
}
|
||||
|
||||
pub fn load_language_from_sources(
|
||||
&self,
|
||||
name: &str,
|
||||
header_path: &Path,
|
||||
parser_path: &Path,
|
||||
scanner_path: &Option<PathBuf>,
|
||||
) -> io::Result<Language> {
|
||||
let mut library_path = self.parser_lib_path.join(name);
|
||||
library_path.set_extension(DYLIB_EXTENSION);
|
||||
|
||||
if needs_recompile(&library_path, &parser_path, &scanner_path)? {
|
||||
let mut config = cc::Build::new();
|
||||
config
|
||||
.cpp(true)
|
||||
.opt_level(2)
|
||||
.cargo_metadata(false)
|
||||
.target(BUILD_TARGET)
|
||||
.host(BUILD_TARGET);
|
||||
let compiler = config.get_compiler();
|
||||
let mut command = Command::new(compiler.path());
|
||||
for (key, value) in compiler.env() {
|
||||
command.env(key, value);
|
||||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
command
|
||||
.args(&["/nologo", "/LD", "/I"])
|
||||
.arg(header_path)
|
||||
.arg("/Od")
|
||||
.arg(parser_path);
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
command.arg(scanner_path);
|
||||
}
|
||||
command
|
||||
.arg("/link")
|
||||
.arg(format!("/out:{}", library_path.to_str().unwrap()));
|
||||
} else {
|
||||
command
|
||||
.arg("-shared")
|
||||
.arg("-fPIC")
|
||||
.arg("-I")
|
||||
.arg(header_path)
|
||||
.arg("-o")
|
||||
.arg(&library_path)
|
||||
.arg("-xc")
|
||||
.arg(parser_path);
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
if scanner_path.extension() == Some("c".as_ref()) {
|
||||
command.arg("-xc").arg("-std=c99").arg(scanner_path);
|
||||
} else {
|
||||
command.arg("-xc++").arg(scanner_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let output = command.output()?;
|
||||
if !output.status.success() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!(
|
||||
"Parser compilation failed.\nStdout: {}\nStderr: {}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
)
|
||||
.as_str(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let library = Library::new(library_path)?;
|
||||
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
|
||||
let language = unsafe {
|
||||
let language_fn: Symbol<unsafe extern "C" fn() -> Language> =
|
||||
library.get(language_fn_name.as_bytes())?;
|
||||
language_fn()
|
||||
};
|
||||
mem::forget(library);
|
||||
Ok(language)
|
||||
}
|
||||
|
||||
fn find_language_at_path<'a>(&'a mut self, parser_path: &Path) -> io::Result<usize> {
|
||||
#[derive(Deserialize)]
|
||||
struct LanguageConfigurationJSON {
|
||||
name: String,
|
||||
#[serde(rename = "file-types")]
|
||||
file_types: Option<Vec<String>>,
|
||||
#[serde(rename = "content-regex")]
|
||||
content_regex: Option<String>,
|
||||
#[serde(rename = "first-line-regex")]
|
||||
first_line_regex: Option<String>,
|
||||
highlights: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PackageJSON {
|
||||
name: String,
|
||||
#[serde(rename = "tree-sitter")]
|
||||
tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
|
||||
}
|
||||
|
||||
let package_json_contents = fs::read_to_string(&parser_path.join("package.json"))?;
|
||||
let package_json: PackageJSON = serde_json::from_str(&package_json_contents)?;
|
||||
let configurations = package_json
|
||||
.tree_sitter
|
||||
.map_or(Vec::new(), |configurations| {
|
||||
configurations
|
||||
.into_iter()
|
||||
.map(|conf| LanguageConfiguration {
|
||||
_name: conf.name,
|
||||
file_types: conf.file_types.unwrap_or(Vec::new()),
|
||||
_content_regex: conf
|
||||
.content_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
_first_line_regex: conf
|
||||
.first_line_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
_highlight_property_sheet: conf.highlights.map(|d| Err(d.into())),
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
for (i, configuration) in configurations.iter().enumerate() {
|
||||
for file_type in &configuration.file_types {
|
||||
self.language_configuration_ids_by_file_type
|
||||
.entry(file_type.to_string())
|
||||
.or_insert(Vec::new())
|
||||
.push((self.language_repos.len(), i));
|
||||
}
|
||||
}
|
||||
|
||||
self.language_repos.push(LanguageRepo {
|
||||
name: package_json
|
||||
.name
|
||||
.split_at("tree-sitter-".len())
|
||||
.1
|
||||
.to_string(),
|
||||
path: parser_path.to_owned(),
|
||||
language: None,
|
||||
configurations,
|
||||
});
|
||||
|
||||
Ok(self.language_repos.len() - 1)
|
||||
}
|
||||
}
|
||||
|
||||
fn needs_recompile(
|
||||
lib_path: &Path,
|
||||
parser_c_path: &Path,
|
||||
scanner_path: &Option<PathBuf>,
|
||||
) -> io::Result<bool> {
|
||||
if !lib_path.exists() {
|
||||
return Ok(true);
|
||||
}
|
||||
let lib_mtime = mtime(lib_path)?;
|
||||
if mtime(parser_c_path)? > lib_mtime {
|
||||
return Ok(true);
|
||||
}
|
||||
if let Some(scanner_path) = scanner_path {
|
||||
if mtime(scanner_path)? > lib_mtime {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn mtime(path: &Path) -> io::Result<SystemTime> {
|
||||
Ok(fs::metadata(path)?.modified()?)
|
||||
}
|
||||
|
||||
fn replace_dashes_with_underscores(name: &str) -> String {
|
||||
let mut result = String::with_capacity(name.len());
|
||||
for c in name.chars() {
|
||||
if c == '-' {
|
||||
result.push('_');
|
||||
} else {
|
||||
result.push(c);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
29
cli/src/logger.rs
Normal file
29
cli/src/logger.rs
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
use log::{LevelFilter, Log, Metadata, Record};
|
||||
|
||||
struct Logger {
|
||||
pub filter: Option<String>,
|
||||
}
|
||||
|
||||
impl Log for Logger {
|
||||
fn enabled(&self, _: &Metadata) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn log(&self, record: &Record) {
|
||||
eprintln!(
|
||||
"[{}] {}",
|
||||
record
|
||||
.module_path()
|
||||
.unwrap_or_default()
|
||||
.trim_start_matches("rust_tree_sitter_cli::"),
|
||||
record.args()
|
||||
);
|
||||
}
|
||||
|
||||
fn flush(&self) {}
|
||||
}
|
||||
|
||||
pub fn init() {
|
||||
log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
|
||||
log::set_max_level(LevelFilter::Info);
|
||||
}
|
||||
145
cli/src/main.rs
Normal file
145
cli/src/main.rs
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
use clap::{App, AppSettings, Arg, SubCommand};
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
use std::usize;
|
||||
use tree_sitter_cli::loader::Loader;
|
||||
use tree_sitter_cli::{error, generate, logger, parse, properties, test};
|
||||
|
||||
fn main() {
|
||||
if let Err(e) = run() {
|
||||
eprintln!("{}", e.0);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn run() -> error::Result<()> {
|
||||
let matches = App::new("tree-sitter")
|
||||
.version(concat!(
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
" (",
|
||||
env!("BUILD_SHA"),
|
||||
")"
|
||||
))
|
||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
||||
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
|
||||
.about("Generates and tests parsers")
|
||||
.subcommand(
|
||||
SubCommand::with_name("generate")
|
||||
.about("Generate a parser")
|
||||
.arg(Arg::with_name("grammar-path").index(1))
|
||||
.arg(Arg::with_name("log").long("log"))
|
||||
.arg(Arg::with_name("properties-only").long("properties"))
|
||||
.arg(
|
||||
Arg::with_name("state-ids-to-log")
|
||||
.long("log-state")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(Arg::with_name("no-minimize").long("no-minimize")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("parse")
|
||||
.about("Parse a file")
|
||||
.arg(
|
||||
Arg::with_name("path")
|
||||
.index(1)
|
||||
.multiple(true)
|
||||
.required(true),
|
||||
)
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("time").long("time").short("t")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("test")
|
||||
.about("Run a parser's tests")
|
||||
.arg(
|
||||
Arg::with_name("filter")
|
||||
.long("filter")
|
||||
.short("f")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let home_dir = dirs::home_dir().unwrap();
|
||||
let current_dir = env::current_dir().unwrap();
|
||||
let config_dir = home_dir.join(".tree-sitter");
|
||||
|
||||
fs::create_dir_all(&config_dir).unwrap();
|
||||
let mut loader = Loader::new(config_dir);
|
||||
|
||||
if let Some(matches) = matches.subcommand_matches("generate") {
|
||||
if matches.is_present("log") {
|
||||
logger::init();
|
||||
}
|
||||
|
||||
let grammar_path = matches.value_of("grammar-path");
|
||||
let minimize = !matches.is_present("no-minimize");
|
||||
let properties_only = matches.is_present("properties-only");
|
||||
let state_ids_to_log = matches
|
||||
.values_of("state-ids-to-log")
|
||||
.map_or(Vec::new(), |ids| {
|
||||
ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
|
||||
.collect()
|
||||
});
|
||||
if !properties_only {
|
||||
generate::generate_parser_in_directory(
|
||||
¤t_dir,
|
||||
grammar_path,
|
||||
minimize,
|
||||
state_ids_to_log,
|
||||
)?;
|
||||
}
|
||||
properties::generate_property_sheets_in_directory(¤t_dir)?;
|
||||
} else if let Some(matches) = matches.subcommand_matches("test") {
|
||||
let debug = matches.is_present("debug");
|
||||
let debug_graph = matches.is_present("debug-graph");
|
||||
let filter = matches.value_of("filter");
|
||||
let corpus_path = current_dir.join("corpus");
|
||||
if let Some(language) = loader.language_at_path(¤t_dir)? {
|
||||
test::run_tests_at_path(language, &corpus_path, debug, debug_graph, filter)?;
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
}
|
||||
} else if let Some(matches) = matches.subcommand_matches("parse") {
|
||||
let debug = matches.is_present("debug");
|
||||
let debug_graph = matches.is_present("debug-graph");
|
||||
let quiet = matches.is_present("quiet");
|
||||
let time = matches.is_present("time");
|
||||
loader.find_all_languages(&vec![home_dir.join("github")])?;
|
||||
let paths = matches
|
||||
.values_of("path")
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
|
||||
for path in paths {
|
||||
let path = Path::new(path);
|
||||
let language =
|
||||
if let Some((l, _)) = loader.language_configuration_for_file_name(path)? {
|
||||
l
|
||||
} else if let Some(l) = loader.language_at_path(¤t_dir)? {
|
||||
l
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
return Ok(());
|
||||
};
|
||||
parse::parse_file_at_path(
|
||||
language,
|
||||
path,
|
||||
max_path_length,
|
||||
quiet,
|
||||
time,
|
||||
debug,
|
||||
debug_graph,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
141
cli/src/parse.rs
Normal file
141
cli/src/parse.rs
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
use super::error::Result;
|
||||
use super::util;
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
use tree_sitter::{Language, LogType, Parser};
|
||||
|
||||
pub fn parse_file_at_path(
|
||||
language: Language,
|
||||
path: &Path,
|
||||
max_path_length: usize,
|
||||
quiet: bool,
|
||||
print_time: bool,
|
||||
debug: bool,
|
||||
debug_graph: bool,
|
||||
) -> Result<()> {
|
||||
let mut _log_session = None;
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language)?;
|
||||
let source_code = fs::read(path)?;
|
||||
|
||||
if debug_graph {
|
||||
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
|
||||
} else if debug {
|
||||
parser.set_logger(Some(Box::new(|log_type, message| {
|
||||
if log_type == LogType::Lex {
|
||||
io::stderr().write(b" ").unwrap();
|
||||
}
|
||||
write!(&mut io::stderr(), "{}\n", message).unwrap();
|
||||
})));
|
||||
}
|
||||
|
||||
let time = Instant::now();
|
||||
let tree = parser
|
||||
.parse_utf8(&mut |byte, _| &source_code[byte..], None)
|
||||
.expect("Incompatible language version");
|
||||
let duration = time.elapsed();
|
||||
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
|
||||
|
||||
let mut cursor = tree.walk();
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
if !quiet {
|
||||
let mut needs_newline = false;
|
||||
let mut indent_level = 0;
|
||||
let mut did_visit_children = false;
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
let is_named = node.is_named();
|
||||
if did_visit_children {
|
||||
if is_named {
|
||||
stdout.write(b")")?;
|
||||
needs_newline = true;
|
||||
}
|
||||
if cursor.goto_next_sibling() {
|
||||
did_visit_children = false;
|
||||
} else if cursor.goto_parent() {
|
||||
did_visit_children = true;
|
||||
indent_level -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if is_named {
|
||||
if needs_newline {
|
||||
stdout.write(b"\n")?;
|
||||
}
|
||||
for _ in 0..indent_level {
|
||||
stdout.write(b" ")?;
|
||||
}
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
write!(
|
||||
&mut stdout,
|
||||
"({} [{}, {}] - [{}, {}]",
|
||||
node.kind(),
|
||||
start.row,
|
||||
start.column,
|
||||
end.row,
|
||||
end.column
|
||||
)?;
|
||||
needs_newline = true;
|
||||
}
|
||||
if cursor.goto_first_child() {
|
||||
did_visit_children = false;
|
||||
indent_level += 1;
|
||||
} else {
|
||||
did_visit_children = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
cursor.reset(tree.root_node());
|
||||
println!("");
|
||||
}
|
||||
|
||||
let mut first_error = None;
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
if node.has_error() {
|
||||
if node.is_error() || node.is_missing() {
|
||||
first_error = Some(node);
|
||||
break;
|
||||
} else {
|
||||
cursor.goto_first_child();
|
||||
}
|
||||
} else if !cursor.goto_next_sibling() {
|
||||
if !cursor.goto_parent() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if first_error.is_some() || print_time {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"{:width$}\t{} ms",
|
||||
path.to_str().unwrap(),
|
||||
duration_ms,
|
||||
width = max_path_length
|
||||
)?;
|
||||
if let Some(node) = first_error {
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
write!(
|
||||
&mut stdout,
|
||||
"\t({} [{}, {}] - [{}, {}]",
|
||||
node.kind(),
|
||||
start.row,
|
||||
start.column,
|
||||
end.row,
|
||||
end.column
|
||||
)?;
|
||||
}
|
||||
write!(&mut stdout, "\n")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
991
cli/src/properties.rs
Normal file
991
cli/src/properties.rs
Normal file
|
|
@ -0,0 +1,991 @@
|
|||
use crate::error::{Error, Result};
|
||||
use log::info;
|
||||
use rsass;
|
||||
use rsass::sass::Value;
|
||||
use serde_derive::Serialize;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
||||
use std::fmt::{self, Write};
|
||||
use std::fs::{self, File};
|
||||
use std::io::BufWriter;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tree_sitter::{self, PropertyStateJSON, PropertyTransitionJSON};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
|
||||
#[serde(untagged)]
|
||||
enum PropertyValue {
|
||||
String(String),
|
||||
Object(PropertySet),
|
||||
Array(Vec<PropertyValue>),
|
||||
}
|
||||
|
||||
type PropertySet = HashMap<String, PropertyValue>;
|
||||
type PropertySheetJSON = tree_sitter::PropertySheetJSON<PropertySet>;
|
||||
type StateId = usize;
|
||||
type PropertySetId = usize;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
struct SelectorStep {
|
||||
kind: String,
|
||||
is_named: bool,
|
||||
is_immediate: bool,
|
||||
child_index: Option<usize>,
|
||||
text_pattern: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
struct Selector(Vec<SelectorStep>);
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct Rule {
|
||||
selectors: Vec<Selector>,
|
||||
properties: PropertySet,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
struct Item {
|
||||
rule_id: u32,
|
||||
selector_id: u32,
|
||||
step_id: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
struct ItemSet(Vec<Item>);
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
struct SelectorMatch {
|
||||
specificity: u32,
|
||||
rule_id: u32,
|
||||
}
|
||||
|
||||
struct Builder {
|
||||
rules: Vec<Rule>,
|
||||
output: PropertySheetJSON,
|
||||
ids_by_item_set: HashMap<ItemSet, StateId>,
|
||||
item_set_queue: VecDeque<(ItemSet, StateId)>,
|
||||
}
|
||||
|
||||
impl ItemSet {
|
||||
fn new() -> Self {
|
||||
ItemSet(Vec::new())
|
||||
}
|
||||
|
||||
fn insert(&mut self, item: Item) {
|
||||
match self.0.binary_search(&item) {
|
||||
Err(i) => self.0.insert(i, item),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for &'a ItemSet {
|
||||
type Item = &'a Item;
|
||||
type IntoIter = std::slice::Iter<'a, Item>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0.iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
fn new(rules: Vec<Rule>) -> Self {
|
||||
Builder {
|
||||
rules,
|
||||
output: PropertySheetJSON {
|
||||
states: Vec::new(),
|
||||
property_sets: Vec::new(),
|
||||
},
|
||||
ids_by_item_set: HashMap::new(),
|
||||
item_set_queue: VecDeque::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build(mut self) -> PropertySheetJSON {
|
||||
let mut start_item_set = ItemSet::new();
|
||||
for (i, rule) in self.rules.iter().enumerate() {
|
||||
for j in 0..rule.selectors.len() {
|
||||
start_item_set.insert(Item {
|
||||
rule_id: i as u32,
|
||||
selector_id: j as u32,
|
||||
step_id: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
self.add_state(start_item_set);
|
||||
while let Some((item_set, state_id)) = self.item_set_queue.pop_front() {
|
||||
self.populate_state(item_set, state_id);
|
||||
}
|
||||
|
||||
self.remove_duplicate_states();
|
||||
|
||||
for (i, state) in self.output.states.iter_mut().enumerate() {
|
||||
state.id = Some(i);
|
||||
}
|
||||
|
||||
self.output
|
||||
}
|
||||
|
||||
fn add_state(&mut self, item_set: ItemSet) -> StateId {
|
||||
match self.ids_by_item_set.entry(item_set) {
|
||||
Entry::Occupied(o) => *o.get(),
|
||||
Entry::Vacant(v) => {
|
||||
let state_id = self.output.states.len();
|
||||
self.output.states.push(PropertyStateJSON {
|
||||
id: None,
|
||||
transitions: Vec::new(),
|
||||
property_set_id: 0,
|
||||
default_next_state_id: 0,
|
||||
});
|
||||
self.item_set_queue.push_back((v.key().clone(), state_id));
|
||||
v.insert(state_id);
|
||||
state_id
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_property_set(&mut self, properties: PropertySet) -> PropertySetId {
|
||||
if let Some(index) = self
|
||||
.output
|
||||
.property_sets
|
||||
.iter()
|
||||
.position(|i| *i == properties)
|
||||
{
|
||||
index
|
||||
} else {
|
||||
self.output.property_sets.push(properties);
|
||||
self.output.property_sets.len() - 1
|
||||
}
|
||||
}
|
||||
|
||||
fn populate_state(&mut self, item_set: ItemSet, state_id: StateId) {
|
||||
let mut transition_map: HashSet<(PropertyTransitionJSON, u32)> = HashSet::new();
|
||||
let mut selector_matches = Vec::new();
|
||||
|
||||
// First, compute all of the possible state transition predicates for
|
||||
// this state, and all of the rules that are currently matching.
|
||||
for item in &item_set {
|
||||
let rule = &self.rules[item.rule_id as usize];
|
||||
let selector = &rule.selectors[item.selector_id as usize];
|
||||
let next_step = selector.0.get(item.step_id as usize);
|
||||
|
||||
// If this item has more elements remaining in its selector, then
|
||||
// add a state transition based on the next step.
|
||||
if let Some(step) = next_step {
|
||||
transition_map.insert((
|
||||
PropertyTransitionJSON {
|
||||
kind: step.kind.clone(),
|
||||
named: step.is_named,
|
||||
index: step.child_index,
|
||||
text: step.text_pattern.clone(),
|
||||
state_id: 0,
|
||||
},
|
||||
// Include the rule id so that it can be used when sorting transitions.
|
||||
item.rule_id,
|
||||
));
|
||||
}
|
||||
// If the item has matched its entire selector, then the item's
|
||||
// properties are applicable to this state.
|
||||
else {
|
||||
selector_matches.push(SelectorMatch {
|
||||
rule_id: item.rule_id,
|
||||
specificity: selector_specificity(selector),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// For eacy possible state transition, compute the set of items in that transition's
|
||||
// destination state.
|
||||
let mut transition_list: Vec<(PropertyTransitionJSON, u32)> = transition_map
|
||||
.into_iter()
|
||||
.map(|(mut transition, rule_id)| {
|
||||
let mut next_item_set = ItemSet::new();
|
||||
for item in &item_set {
|
||||
let rule = &self.rules[item.rule_id as usize];
|
||||
let selector = &rule.selectors[item.selector_id as usize];
|
||||
let next_step = selector.0.get(item.step_id as usize);
|
||||
|
||||
if let Some(step) = next_step {
|
||||
// If the next step of the item's selector satisfies this transition,
|
||||
// advance the item to the next part of its selector and add the
|
||||
// resulting item to this transition's destination state.
|
||||
if step_matches_transition(step, &transition) {
|
||||
next_item_set.insert(Item {
|
||||
rule_id: item.rule_id,
|
||||
selector_id: item.selector_id,
|
||||
step_id: item.step_id + 1,
|
||||
});
|
||||
}
|
||||
|
||||
// If the next step of the item is not an immediate child, then
|
||||
// include this item in this transition's destination state, because
|
||||
// the next step of the item might match a descendant node.
|
||||
if !step.is_immediate {
|
||||
next_item_set.insert(*item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
transition.state_id = self.add_state(next_item_set);
|
||||
(transition, rule_id)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Ensure that for a given node type, more specific transitions are tried
|
||||
// first, and in the event of a tie, transitions corresponding to later rules
|
||||
// in the cascade are tried first.
|
||||
transition_list.sort_by(|a, b| {
|
||||
let result = a.0.kind.cmp(&b.0.kind);
|
||||
if result != Ordering::Equal {
|
||||
return result;
|
||||
}
|
||||
let result = a.0.named.cmp(&b.0.named);
|
||||
if result != Ordering::Equal {
|
||||
return result;
|
||||
}
|
||||
let result = transition_specificity(&b.0).cmp(&transition_specificity(&a.0));
|
||||
if result != Ordering::Equal {
|
||||
return result;
|
||||
}
|
||||
b.1.cmp(&a.1)
|
||||
});
|
||||
|
||||
// Compute the merged properties that apply in the current state.
|
||||
// Sort the matching property sets by ascending specificity and by
|
||||
// their order in the sheet. This way, more specific selectors and later
|
||||
// rules will override less specific selectors and earlier rules.
|
||||
let mut properties = PropertySet::new();
|
||||
selector_matches.sort_unstable_by(|a, b| {
|
||||
let result = a.specificity.cmp(&b.specificity);
|
||||
if result != Ordering::Equal {
|
||||
return result;
|
||||
}
|
||||
a.rule_id.cmp(&b.rule_id)
|
||||
});
|
||||
selector_matches.dedup();
|
||||
for selector_match in selector_matches {
|
||||
let rule = &self.rules[selector_match.rule_id as usize];
|
||||
for (property, value) in &rule.properties {
|
||||
properties.insert(property.clone(), value.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the default successor item set - the item set that
|
||||
// we should advance to if the next element doesn't match any
|
||||
// of the next elements in the item set's selectors.
|
||||
let mut default_next_item_set = ItemSet::new();
|
||||
for item in &item_set {
|
||||
let rule = &self.rules[item.rule_id as usize];
|
||||
let selector = &rule.selectors[item.selector_id as usize];
|
||||
let next_step = selector.0.get(item.step_id as usize);
|
||||
if let Some(step) = next_step {
|
||||
if !step.is_immediate {
|
||||
default_next_item_set.insert(*item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.output.states[state_id].default_next_state_id = self.add_state(default_next_item_set);
|
||||
self.output.states[state_id].property_set_id = self.add_property_set(properties);
|
||||
self.output.states[state_id]
|
||||
.transitions
|
||||
.extend(transition_list.into_iter().map(|i| i.0));
|
||||
}
|
||||
|
||||
fn remove_duplicate_states(&mut self) {
|
||||
let mut state_replacements = BTreeMap::new();
|
||||
let mut done = false;
|
||||
while !done {
|
||||
done = true;
|
||||
for (i, state_i) in self.output.states.iter().enumerate() {
|
||||
if state_replacements.contains_key(&i) {
|
||||
continue;
|
||||
}
|
||||
for (j, state_j) in self.output.states.iter().enumerate() {
|
||||
if j == i {
|
||||
break;
|
||||
}
|
||||
if state_replacements.contains_key(&j) {
|
||||
continue;
|
||||
}
|
||||
if state_i == state_j {
|
||||
info!("replace state {} with state {}", i, j);
|
||||
state_replacements.insert(i, j);
|
||||
done = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for state in self.output.states.iter_mut() {
|
||||
for transition in state.transitions.iter_mut() {
|
||||
if let Some(replacement) = state_replacements.get(&transition.state_id) {
|
||||
transition.state_id = *replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_state_replacements = (0..self.output.states.len())
|
||||
.into_iter()
|
||||
.map(|state_id| {
|
||||
let replacement = state_replacements
|
||||
.get(&state_id)
|
||||
.cloned()
|
||||
.unwrap_or(state_id);
|
||||
let prior_removed = state_replacements
|
||||
.iter()
|
||||
.take_while(|i| *i.0 < replacement)
|
||||
.count();
|
||||
replacement - prior_removed
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for state in self.output.states.iter_mut() {
|
||||
for transition in state.transitions.iter_mut() {
|
||||
transition.state_id = final_state_replacements[transition.state_id];
|
||||
}
|
||||
}
|
||||
|
||||
let mut i = 0;
|
||||
self.output.states.retain(|_| {
|
||||
let result = !state_replacements.contains_key(&i);
|
||||
i += 1;
|
||||
result
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn selector_specificity(selector: &Selector) -> u32 {
|
||||
let mut result = selector.0.len() as u32;
|
||||
for step in &selector.0 {
|
||||
if step.child_index.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
if step.text_pattern.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 {
|
||||
let mut result = 0;
|
||||
if transition.index.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
if transition.text.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJSON) -> bool {
|
||||
step.kind == transition.kind
|
||||
&& step.is_named == transition.named
|
||||
&& (step.child_index == transition.index || step.child_index.is_none())
|
||||
&& (step.text_pattern == transition.text || step.text_pattern.is_none())
|
||||
}
|
||||
|
||||
impl fmt::Debug for SelectorStep {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "(")?;
|
||||
if self.is_named {
|
||||
write!(f, "{}", self.kind)?;
|
||||
} else {
|
||||
write!(f, "\"{}\"", self.kind)?;
|
||||
}
|
||||
if let Some(n) = self.child_index {
|
||||
write!(f, ":nth-child({})", n)?;
|
||||
}
|
||||
if let Some(t) = &self.text_pattern {
|
||||
write!(f, "[text='{}']", t)?;
|
||||
}
|
||||
write!(f, ")")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Selector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "[")?;
|
||||
for (i, step) in self.0.iter().enumerate() {
|
||||
if step.is_immediate {
|
||||
write!(f, " > ")?;
|
||||
} else if i > 0 {
|
||||
write!(f, " ")?;
|
||||
}
|
||||
write!(f, "{:?}", step)?;
|
||||
}
|
||||
write!(f, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_property_sheets_in_directory(repo_path: &Path) -> Result<()> {
|
||||
let src_dir_path = repo_path.join("src");
|
||||
let properties_dir_path = repo_path.join("properties");
|
||||
|
||||
if let Ok(entries) = fs::read_dir(properties_dir_path) {
|
||||
for entry in entries {
|
||||
let css_path = entry?.path();
|
||||
let css = fs::read_to_string(&css_path)?;
|
||||
let sheet = generate_property_sheet(&css_path, &css)?;
|
||||
let property_sheet_json_path = src_dir_path
|
||||
.join(css_path.file_name().unwrap())
|
||||
.with_extension("json");
|
||||
let property_sheet_json_file = File::create(&property_sheet_json_path)
|
||||
.map_err(|e| format!("Failed to create {:?}: {}", property_sheet_json_path, e))?;
|
||||
let mut writer = BufWriter::new(property_sheet_json_file);
|
||||
serde_json::to_writer_pretty(&mut writer, &sheet)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn generate_property_sheet_string(path: impl AsRef<Path>, css: &str) -> Result<String> {
|
||||
let sheet = generate_property_sheet(path, css)?;
|
||||
Ok(serde_json::to_string(&sheet)?)
|
||||
}
|
||||
|
||||
fn generate_property_sheet(path: impl AsRef<Path>, css: &str) -> Result<PropertySheetJSON> {
|
||||
let rules = parse_property_sheet(path.as_ref(), &css)?;
|
||||
Ok(Builder::new(rules).build())
|
||||
}
|
||||
|
||||
fn parse_property_sheet(path: &Path, css: &str) -> Result<Vec<Rule>> {
|
||||
let mut i = 0;
|
||||
let mut items = rsass::parse_scss_data(css.as_bytes())?;
|
||||
while i < items.len() {
|
||||
match &items[i] {
|
||||
rsass::Item::Import(arg) => {
|
||||
if let Some(s) = get_sass_string(arg) {
|
||||
let import_path = resolve_path(path, s)?;
|
||||
let imported_items = rsass::parse_scss_file(&import_path)?;
|
||||
items.splice(i..(i + 1), imported_items);
|
||||
continue;
|
||||
} else {
|
||||
return Err(Error("@import arguments must be strings".to_string()));
|
||||
}
|
||||
}
|
||||
rsass::Item::AtRule { name, args, .. } => match name.as_str() {
|
||||
"schema" => {
|
||||
if let Some(s) = get_sass_string(args) {
|
||||
// TODO - use schema
|
||||
let _schema_path = resolve_path(path, s)?;
|
||||
items.remove(i);
|
||||
continue;
|
||||
} else {
|
||||
return Err(Error("@schema arguments must be strings".to_string()));
|
||||
}
|
||||
}
|
||||
_ => return Err(Error(format!("Unsupported at-rule '{}'", name))),
|
||||
},
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let mut result = Vec::new();
|
||||
let selector_prefixes = vec![Vec::new()];
|
||||
parse_sass_items(items, &selector_prefixes, &mut result)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn parse_sass_items(
|
||||
items: Vec<rsass::Item>,
|
||||
selector_prefixes: &Vec<Vec<SelectorStep>>,
|
||||
result: &mut Vec<Rule>,
|
||||
) -> Result<()> {
|
||||
let mut properties = PropertySet::new();
|
||||
for item in items {
|
||||
match item {
|
||||
rsass::Item::None | rsass::Item::Comment(_) => {}
|
||||
rsass::Item::Property(name, value) => {
|
||||
let value = parse_sass_value(&value)?;
|
||||
match properties.entry(name.to_string()) {
|
||||
Entry::Vacant(v) => {
|
||||
v.insert(value);
|
||||
}
|
||||
Entry::Occupied(mut o) => {
|
||||
let existing_value = o.get_mut();
|
||||
if let PropertyValue::Array(items) = existing_value {
|
||||
items.push(value);
|
||||
continue;
|
||||
} else {
|
||||
let v = existing_value.clone();
|
||||
*existing_value = PropertyValue::Array(vec![v, value]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
rsass::Item::Rule(selectors, items) => {
|
||||
let mut full_selectors = Vec::new();
|
||||
for prefix in selector_prefixes {
|
||||
let mut part_string = String::new();
|
||||
let mut next_step_is_immediate = false;
|
||||
for selector in &selectors.s {
|
||||
let mut prefix = prefix.clone();
|
||||
for part in &selector.0 {
|
||||
part_string.clear();
|
||||
write!(&mut part_string, "{}", part).unwrap();
|
||||
let part_string = part_string.trim();
|
||||
if !part_string.is_empty() {
|
||||
if part_string == "&" {
|
||||
continue;
|
||||
} else if part_string.starts_with(":nth-child(") {
|
||||
if let Some(last_step) = prefix.last_mut() {
|
||||
if let Ok(index) = usize::from_str_radix(
|
||||
&part_string[11..(part_string.len() - 1)],
|
||||
10,
|
||||
) {
|
||||
last_step.child_index = Some(index);
|
||||
}
|
||||
}
|
||||
} else if part_string.starts_with("[text=") {
|
||||
if let Some(last_step) = prefix.last_mut() {
|
||||
last_step.text_pattern = Some(
|
||||
part_string[7..(part_string.len() - 2)].to_string(),
|
||||
)
|
||||
}
|
||||
} else if part_string == ">" {
|
||||
next_step_is_immediate = true;
|
||||
} else if part_string.starts_with("[token=") {
|
||||
prefix.push(SelectorStep {
|
||||
kind: part_string[8..(part_string.len() - 2)].to_string(),
|
||||
is_named: false,
|
||||
child_index: None,
|
||||
text_pattern: None,
|
||||
is_immediate: next_step_is_immediate,
|
||||
});
|
||||
next_step_is_immediate = false;
|
||||
} else {
|
||||
prefix.push(SelectorStep {
|
||||
kind: part_string.to_string(),
|
||||
is_named: true,
|
||||
child_index: None,
|
||||
text_pattern: None,
|
||||
is_immediate: next_step_is_immediate,
|
||||
});
|
||||
next_step_is_immediate = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
full_selectors.push(prefix);
|
||||
}
|
||||
}
|
||||
parse_sass_items(items, &full_selectors, result)?;
|
||||
}
|
||||
_ => return Err(Error(format!("Unsupported syntax type {:?}", item))),
|
||||
}
|
||||
}
|
||||
|
||||
if !properties.is_empty() {
|
||||
result.push(Rule {
|
||||
selectors: selector_prefixes.iter().cloned().map(Selector).collect(),
|
||||
properties,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_sass_value(value: &Value) -> Result<PropertyValue> {
|
||||
match value {
|
||||
Value::Literal(s) => {
|
||||
if let Some(s) = s.single_raw() {
|
||||
Ok(PropertyValue::String(s.to_string()))
|
||||
} else {
|
||||
Err(Error("String interpolation is not supported".to_string()))
|
||||
}
|
||||
}
|
||||
Value::Call(name, raw_args) => {
|
||||
if let Some(name) = name.single_raw() {
|
||||
let mut args = Vec::new();
|
||||
for (_, arg) in raw_args.iter() {
|
||||
args.push(parse_sass_value(arg)?);
|
||||
}
|
||||
let mut result = PropertySet::new();
|
||||
result.insert("name".to_string(), PropertyValue::String(name.to_string()));
|
||||
result.insert("args".to_string(), PropertyValue::Array(args));
|
||||
Ok(PropertyValue::Object(result))
|
||||
} else {
|
||||
Err(Error("String interpolation is not supported".to_string()))
|
||||
}
|
||||
}
|
||||
Value::List(elements, ..) => {
|
||||
let mut result = Vec::new();
|
||||
for element in elements {
|
||||
result.push(parse_sass_value(element)?);
|
||||
}
|
||||
Ok(PropertyValue::Array(result))
|
||||
}
|
||||
Value::Color(_, Some(name)) => Ok(PropertyValue::String(name.clone())),
|
||||
Value::Numeric(n, _) => Ok(PropertyValue::String(format!("{}", n))),
|
||||
Value::True => Ok(PropertyValue::String("true".to_string())),
|
||||
Value::False => Ok(PropertyValue::String("false".to_string())),
|
||||
_ => Err(Error(format!(
|
||||
"Property values must be strings or function calls. Got {:?}",
|
||||
value
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_sass_string(value: &Value) -> Option<&str> {
|
||||
if let Value::Literal(s) = value {
|
||||
s.single_raw()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_path(base: &Path, p: &str) -> Result<PathBuf> {
|
||||
let path = Path::new(p);
|
||||
let mut result = base.to_owned();
|
||||
result.pop();
|
||||
if path.starts_with(".") {
|
||||
result.push(path);
|
||||
if result.exists() {
|
||||
return Ok(result);
|
||||
}
|
||||
} else {
|
||||
loop {
|
||||
result.push("node_modules");
|
||||
result.push(path);
|
||||
if result.exists() {
|
||||
return Ok(result);
|
||||
}
|
||||
result.pop();
|
||||
result.pop();
|
||||
if !result.pop() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(Error(format!("Could not resolve import path `{}`", p)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use regex::Regex;
|
||||
|
||||
#[test]
|
||||
fn test_immediate_child_and_descendant_selectors() {
|
||||
let sheet = generate_property_sheet(
|
||||
"foo.css",
|
||||
"
|
||||
f1 {
|
||||
color: red;
|
||||
|
||||
& > f2 {
|
||||
color: green;
|
||||
}
|
||||
|
||||
& f3 {
|
||||
color: blue;
|
||||
}
|
||||
}
|
||||
|
||||
f2 {
|
||||
color: indigo;
|
||||
height: 2;
|
||||
}
|
||||
|
||||
f3 {
|
||||
color: violet;
|
||||
height: 3;
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// f1 single-element selector
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f1"]),
|
||||
props(&[("color", "red")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f2", "f1"]),
|
||||
props(&[("color", "red")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f2", "f3", "f1"]),
|
||||
props(&[("color", "red")])
|
||||
);
|
||||
|
||||
// f2 single-element selector
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f2"]),
|
||||
props(&[("color", "indigo"), ("height", "2")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f2", "f2"]),
|
||||
props(&[("color", "indigo"), ("height", "2")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f1", "f3", "f2"]),
|
||||
props(&[("color", "indigo"), ("height", "2")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f1", "f6", "f2"]),
|
||||
props(&[("color", "indigo"), ("height", "2")])
|
||||
);
|
||||
|
||||
// f3 single-element selector
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f3"]),
|
||||
props(&[("color", "violet"), ("height", "3")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f2", "f3"]),
|
||||
props(&[("color", "violet"), ("height", "3")])
|
||||
);
|
||||
|
||||
// f2 child selector
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f1", "f2"]),
|
||||
props(&[("color", "green"), ("height", "2")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f2", "f1", "f2"]),
|
||||
props(&[("color", "green"), ("height", "2")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f3", "f1", "f2"]),
|
||||
props(&[("color", "green"), ("height", "2")])
|
||||
);
|
||||
|
||||
// f3 descendant selector
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f1", "f3"]),
|
||||
props(&[("color", "blue"), ("height", "3")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f1", "f2", "f3"]),
|
||||
props(&[("color", "blue"), ("height", "3")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query_simple(&sheet, vec!["f1", "f6", "f7", "f8", "f3"]),
|
||||
props(&[("color", "blue"), ("height", "3")])
|
||||
);
|
||||
|
||||
// no match
|
||||
assert_eq!(*query_simple(&sheet, vec!["f1", "f3", "f4"]), props(&[]));
|
||||
assert_eq!(*query_simple(&sheet, vec!["f1", "f2", "f5"]), props(&[]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_attribute() {
|
||||
let sheet = generate_property_sheet(
|
||||
"foo.css",
|
||||
"
|
||||
f1 {
|
||||
color: red;
|
||||
|
||||
&[text='^[A-Z]'] {
|
||||
color: green;
|
||||
}
|
||||
|
||||
&[text='^[A-Z_]+$'] {
|
||||
color: blue;
|
||||
}
|
||||
}
|
||||
|
||||
f2[text='^[A-Z_]+$'] {
|
||||
color: purple;
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0)], "abc"),
|
||||
props(&[("color", "red")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0)], "Abc"),
|
||||
props(&[("color", "green")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0)], "AB_CD"),
|
||||
props(&[("color", "blue")])
|
||||
);
|
||||
assert_eq!(*query(&sheet, vec![("f2", true, 0)], "Abc"), props(&[]));
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f2", true, 0)], "ABC"),
|
||||
props(&[("color", "purple")])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cascade_ordering_as_tie_breaker() {
|
||||
let sheet = generate_property_sheet(
|
||||
"foo.css",
|
||||
"
|
||||
f1 f2:nth-child(1) { color: red; }
|
||||
f1:nth-child(1) f2 { color: green; }
|
||||
f1 f2[text='a'] { color: blue; }
|
||||
f1 f2[text='b'] { color: violet; }
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0), ("f2", true, 0)], "x"),
|
||||
props(&[])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0), ("f2", true, 1)], "x"),
|
||||
props(&[("color", "red")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "x"),
|
||||
props(&[("color", "green")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "a"),
|
||||
props(&[("color", "blue")])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "ab"),
|
||||
props(&[("color", "violet")])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_css_function_calls() {
|
||||
let sheet = generate_property_sheet(
|
||||
"foo.css",
|
||||
"
|
||||
a {
|
||||
b: f();
|
||||
c: f(g(h), i, \"j\", 10);
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let p = query_simple(&sheet, vec!["a"]);
|
||||
|
||||
assert_eq!(
|
||||
p["b"],
|
||||
object(&[("name", string("f")), ("args", array(vec![])),])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
p["c"],
|
||||
object(&[
|
||||
("name", string("f")),
|
||||
(
|
||||
"args",
|
||||
array(vec![
|
||||
object(&[("name", string("g")), ("args", array(vec![string("h"),]))]),
|
||||
string("i"),
|
||||
string("j"),
|
||||
string("10"),
|
||||
])
|
||||
),
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_array_by_declaring_property_multiple_times() {
|
||||
let sheet = generate_property_sheet(
|
||||
"foo.css",
|
||||
"
|
||||
a {
|
||||
b: 'foo';
|
||||
b: 'bar';
|
||||
b: 'baz';
|
||||
c: f(g());
|
||||
c: h();
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let p = query_simple(&sheet, vec!["a"]);
|
||||
|
||||
assert_eq!(
|
||||
p["b"],
|
||||
array(vec![string("foo"), string("bar"), string("baz"),])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
p["c"],
|
||||
array(vec![
|
||||
object(&[
|
||||
("name", string("f")),
|
||||
(
|
||||
"args",
|
||||
array(vec![object(&[
|
||||
("name", string("g")),
|
||||
("args", array(vec![])),
|
||||
])])
|
||||
)
|
||||
]),
|
||||
object(&[("name", string("h")), ("args", array(vec![])),]),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
fn query_simple<'a>(
|
||||
sheet: &'a PropertySheetJSON,
|
||||
node_stack: Vec<&'static str>,
|
||||
) -> &'a PropertySet {
|
||||
query(
|
||||
sheet,
|
||||
node_stack.into_iter().map(|s| (s, true, 0)).collect(),
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
fn query<'a>(
|
||||
sheet: &'a PropertySheetJSON,
|
||||
node_stack: Vec<(&'static str, bool, usize)>,
|
||||
leaf_text: &str,
|
||||
) -> &'a PropertySet {
|
||||
let mut state_id = 0;
|
||||
for (kind, is_named, child_index) in node_stack {
|
||||
let state = &sheet.states[state_id];
|
||||
state_id = state
|
||||
.transitions
|
||||
.iter()
|
||||
.find(|transition| {
|
||||
transition.kind == kind
|
||||
&& transition.named == is_named
|
||||
&& transition.index.map_or(true, |index| index == child_index)
|
||||
&& (transition
|
||||
.text
|
||||
.as_ref()
|
||||
.map_or(true, |text| Regex::new(text).unwrap().is_match(leaf_text)))
|
||||
})
|
||||
.map_or(state.default_next_state_id, |t| t.state_id);
|
||||
}
|
||||
&sheet.property_sets[sheet.states[state_id].property_set_id]
|
||||
}
|
||||
|
||||
fn array(s: Vec<PropertyValue>) -> PropertyValue {
|
||||
PropertyValue::Array(s)
|
||||
}
|
||||
|
||||
fn object<'a>(s: &'a [(&'a str, PropertyValue)]) -> PropertyValue {
|
||||
PropertyValue::Object(
|
||||
s.into_iter()
|
||||
.map(|(a, b)| (a.to_string(), b.clone()))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn string(s: &str) -> PropertyValue {
|
||||
PropertyValue::String(s.to_string())
|
||||
}
|
||||
|
||||
fn props<'a>(s: &'a [(&'a str, &'a str)]) -> PropertySet {
|
||||
s.into_iter()
|
||||
.map(|(a, b)| (a.to_string(), PropertyValue::String(b.to_string())))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
280
cli/src/test.rs
Normal file
280
cli/src/test.rs
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
use super::error::{Error, Result};
|
||||
use super::util;
|
||||
use ansi_term::Colour;
|
||||
use difference::{Changeset, Difference};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
|
||||
use regex::Regex;
|
||||
use std::char;
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
use tree_sitter::{Language, LogType, Parser};
|
||||
|
||||
lazy_static! {
|
||||
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"\r?\n---+\r?\n")
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum TestEntry {
|
||||
Group {
|
||||
name: String,
|
||||
children: Vec<TestEntry>,
|
||||
},
|
||||
Example {
|
||||
name: String,
|
||||
input: Vec<u8>,
|
||||
output: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl Default for TestEntry {
|
||||
fn default() -> Self {
|
||||
TestEntry::Group {
|
||||
name: String::new(),
|
||||
children: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run_tests_at_path(
|
||||
language: Language,
|
||||
path: &Path,
|
||||
debug: bool,
|
||||
debug_graph: bool,
|
||||
filter: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let test_entry = parse_tests(path)?;
|
||||
let mut _log_session = None;
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language)?;
|
||||
|
||||
if debug_graph {
|
||||
_log_session = Some(util::log_graphs(&mut parser, "log.html")?);
|
||||
} else if debug {
|
||||
parser.set_logger(Some(Box::new(|log_type, message| {
|
||||
if log_type == LogType::Lex {
|
||||
io::stderr().write(b" ").unwrap();
|
||||
}
|
||||
write!(&mut io::stderr(), "{}\n", message).unwrap();
|
||||
})));
|
||||
}
|
||||
|
||||
let mut failures = Vec::new();
|
||||
if let TestEntry::Group { children, .. } = test_entry {
|
||||
for child in children {
|
||||
run_tests(&mut parser, child, filter, 0, &mut failures)?;
|
||||
}
|
||||
}
|
||||
|
||||
if failures.len() > 0 {
|
||||
println!("");
|
||||
|
||||
if failures.len() == 1 {
|
||||
println!("1 failure:")
|
||||
} else {
|
||||
println!("{} failures:", failures.len())
|
||||
}
|
||||
|
||||
print_diff_key();
|
||||
for (i, (name, actual, expected)) in failures.iter().enumerate() {
|
||||
println!("\n {}. {}:", i + 1, name);
|
||||
print_diff(actual, expected);
|
||||
}
|
||||
Err(Error(String::new()))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn print_diff_key() {
|
||||
println!(
|
||||
"\n{} / {}",
|
||||
Colour::Green.paint("expected"),
|
||||
Colour::Red.paint("actual")
|
||||
);
|
||||
}
|
||||
|
||||
pub fn print_diff(actual: &String, expected: &String) {
|
||||
let changeset = Changeset::new(actual, expected, " ");
|
||||
print!(" ");
|
||||
for diff in &changeset.diffs {
|
||||
match diff {
|
||||
Difference::Same(part) => {
|
||||
print!("{}{}", part, changeset.split);
|
||||
}
|
||||
Difference::Add(part) => {
|
||||
print!("{}{}", Colour::Green.paint(part), changeset.split);
|
||||
}
|
||||
Difference::Rem(part) => {
|
||||
print!("{}{}", Colour::Red.paint(part), changeset.split);
|
||||
}
|
||||
}
|
||||
}
|
||||
println!("");
|
||||
}
|
||||
|
||||
fn run_tests(
|
||||
parser: &mut Parser,
|
||||
test_entry: TestEntry,
|
||||
filter: Option<&str>,
|
||||
mut indent_level: i32,
|
||||
failures: &mut Vec<(String, String, String)>,
|
||||
) -> Result<()> {
|
||||
match test_entry {
|
||||
TestEntry::Example {
|
||||
name,
|
||||
input,
|
||||
output,
|
||||
} => {
|
||||
if let Some(filter) = filter {
|
||||
if !name.contains(filter) {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
let tree = parser
|
||||
.parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
|
||||
.unwrap();
|
||||
let actual = tree.root_node().to_sexp();
|
||||
for _ in 0..indent_level {
|
||||
print!(" ");
|
||||
}
|
||||
if actual == output {
|
||||
println!("✓ {}", Colour::Green.paint(&name));
|
||||
} else {
|
||||
println!("✗ {}", Colour::Red.paint(&name));
|
||||
failures.push((name, actual, output));
|
||||
}
|
||||
}
|
||||
TestEntry::Group { name, children } => {
|
||||
for _ in 0..indent_level {
|
||||
print!(" ");
|
||||
}
|
||||
println!("{}:", name);
|
||||
indent_level += 1;
|
||||
for child in children {
|
||||
run_tests(parser, child, filter, indent_level, failures)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
|
||||
let name = path
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
if path.is_dir() {
|
||||
let mut children = Vec::new();
|
||||
for entry in fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
children.push(parse_tests(&entry.path())?);
|
||||
}
|
||||
Ok(TestEntry::Group { name, children })
|
||||
} else {
|
||||
let content = fs::read_to_string(path)?;
|
||||
Ok(parse_test_content(name, content))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_test_content(name: String, content: String) -> TestEntry {
|
||||
let mut children = Vec::new();
|
||||
let bytes = content.as_bytes();
|
||||
let mut previous_name = String::new();
|
||||
let mut previous_header_end = 0;
|
||||
for header_match in HEADER_REGEX
|
||||
.find_iter(&bytes)
|
||||
.map(|m| (m.start(), m.end()))
|
||||
.chain(Some((bytes.len(), bytes.len())))
|
||||
{
|
||||
let (header_start, header_end) = header_match;
|
||||
if previous_header_end > 0 {
|
||||
if let Some(divider_match) =
|
||||
DIVIDER_REGEX.find(&bytes[previous_header_end..header_start])
|
||||
{
|
||||
let (divider_start, divider_end) = (
|
||||
previous_header_end + divider_match.start(),
|
||||
previous_header_end + divider_match.end(),
|
||||
);
|
||||
if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) {
|
||||
let input = bytes[previous_header_end..divider_start].to_vec();
|
||||
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string();
|
||||
let output = output.replace(" )", ")");
|
||||
children.push(TestEntry::Example {
|
||||
name: previous_name,
|
||||
input,
|
||||
output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
previous_name = String::from_utf8_lossy(&bytes[header_start..header_end])
|
||||
.trim_matches(|c| char::is_whitespace(c) || c == '=')
|
||||
.to_string();
|
||||
previous_header_end = header_end;
|
||||
}
|
||||
TestEntry::Group { name, children }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_test_content() {
|
||||
let entry = parse_test_content(
|
||||
"the-filename".to_string(),
|
||||
r#"
|
||||
===============
|
||||
The first test
|
||||
===============
|
||||
|
||||
a b c
|
||||
|
||||
---
|
||||
|
||||
(a
|
||||
(b c))
|
||||
|
||||
================
|
||||
The second test
|
||||
================
|
||||
d
|
||||
---
|
||||
(d)
|
||||
"#
|
||||
.trim()
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
entry,
|
||||
TestEntry::Group {
|
||||
name: "the-filename".to_string(),
|
||||
children: vec![
|
||||
TestEntry::Example {
|
||||
name: "The first test".to_string(),
|
||||
input: "\na b c\n".as_bytes().to_vec(),
|
||||
output: "(a (b c))".to_string(),
|
||||
},
|
||||
TestEntry::Example {
|
||||
name: "The second test".to_string(),
|
||||
input: "d".as_bytes().to_vec(),
|
||||
output: "(d)".to_string(),
|
||||
},
|
||||
]
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
390
cli/src/tests/corpus_test.rs
Normal file
390
cli/src/tests/corpus_test.rs
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
use super::helpers::allocations;
|
||||
use super::helpers::edits::{get_random_edit, invert_edit, perform_edit};
|
||||
use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language};
|
||||
use super::helpers::random::Rand;
|
||||
use super::helpers::scope_sequence::ScopeSequence;
|
||||
use crate::generate;
|
||||
use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
|
||||
use crate::util;
|
||||
use lazy_static::lazy_static;
|
||||
use std::{env, fs, time, usize};
|
||||
use tree_sitter::{LogType, Node, Parser, Tree};
|
||||
|
||||
const EDIT_COUNT: usize = 3;
|
||||
const TRIAL_COUNT: usize = 10;
|
||||
const LANGUAGES: &'static [&'static str] = &[
|
||||
"bash",
|
||||
"c",
|
||||
"cpp",
|
||||
"embedded-template",
|
||||
"go",
|
||||
"html",
|
||||
"javascript",
|
||||
"python",
|
||||
];
|
||||
|
||||
lazy_static! {
|
||||
static ref LOG_ENABLED: bool = env::var("TREE_SITTER_TEST_ENABLE_LOG").is_ok();
|
||||
static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_TEST_ENABLE_LOG_GRAPHS").is_ok();
|
||||
static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_TEST_LANGUAGE_FILTER").ok();
|
||||
static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
|
||||
static ref TRIAL_FILTER: Option<usize> = env::var("TREE_SITTER_TEST_TRIAL_FILTER")
|
||||
.map(|s| usize::from_str_radix(&s, 10).unwrap())
|
||||
.ok();
|
||||
pub static ref SEED: usize = env::var("TREE_SITTER_TEST_SEED")
|
||||
.map(|s| usize::from_str_radix(&s, 10).unwrap())
|
||||
.unwrap_or(
|
||||
time::SystemTime::now()
|
||||
.duration_since(time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs() as usize,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_real_language_corpus_files() {
|
||||
eprintln!("\n\nRandom seed: {}\n", *SEED);
|
||||
let grammars_dir = fixtures_dir().join("grammars");
|
||||
let error_corpus_dir = fixtures_dir().join("error_corpus");
|
||||
|
||||
let mut failure_count = 0;
|
||||
for language_name in LANGUAGES.iter().cloned() {
|
||||
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
|
||||
if language_name != filter.as_str() {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let language = get_language(language_name);
|
||||
let corpus_dir = grammars_dir.join(language_name).join("corpus");
|
||||
let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
|
||||
let main_tests = parse_tests(&corpus_dir).unwrap();
|
||||
let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
|
||||
let mut tests = flatten_tests(main_tests);
|
||||
tests.extend(flatten_tests(error_tests));
|
||||
|
||||
if !tests.is_empty() {
|
||||
eprintln!("language: {:?}", language_name);
|
||||
}
|
||||
|
||||
for (example_name, input, expected_output) in tests {
|
||||
eprintln!(" example: {:?}", example_name);
|
||||
|
||||
if TRIAL_FILTER.map_or(true, |t| t == 0) {
|
||||
allocations::start_recording();
|
||||
let mut log_session = None;
|
||||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse_utf8(&mut |i, _| &input[i..], None).unwrap();
|
||||
let actual_output = tree.root_node().to_sexp();
|
||||
drop(tree);
|
||||
drop(parser);
|
||||
if actual_output != expected_output {
|
||||
print_diff_key();
|
||||
print_diff(&actual_output, &expected_output);
|
||||
println!("");
|
||||
failure_count += 1;
|
||||
continue;
|
||||
}
|
||||
allocations::stop_recording();
|
||||
}
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser
|
||||
.parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), None)
|
||||
.unwrap();
|
||||
drop(parser);
|
||||
|
||||
for trial in 1..=TRIAL_COUNT {
|
||||
if TRIAL_FILTER.map_or(true, |filter| filter == trial) {
|
||||
let mut rand = Rand::new(*SEED + trial);
|
||||
|
||||
allocations::start_recording();
|
||||
let mut log_session = None;
|
||||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
let mut tree = tree.clone();
|
||||
let mut input = input.clone();
|
||||
|
||||
if *LOG_GRAPH_ENABLED {
|
||||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
}
|
||||
|
||||
// Perform a random series of edits and reparse.
|
||||
let mut undo_stack = Vec::new();
|
||||
for _ in 0..EDIT_COUNT {
|
||||
let edit = get_random_edit(&mut rand, &input);
|
||||
undo_stack.push(invert_edit(&input, &edit));
|
||||
perform_edit(&mut tree, &mut input, &edit);
|
||||
}
|
||||
if *LOG_GRAPH_ENABLED {
|
||||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
}
|
||||
|
||||
let mut tree2 = parser
|
||||
.parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), Some(&tree))
|
||||
.unwrap();
|
||||
|
||||
// Check that the new tree is consistent.
|
||||
check_consistent_sizes(&tree2, &input);
|
||||
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
|
||||
println!(
|
||||
"\nUnexpected scope change in trial {}\n{}\n\n",
|
||||
trial, message
|
||||
);
|
||||
failure_count += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Undo all of the edits and re-parse again.
|
||||
while let Some(edit) = undo_stack.pop() {
|
||||
perform_edit(&mut tree2, &mut input, &edit);
|
||||
}
|
||||
if *LOG_GRAPH_ENABLED {
|
||||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
}
|
||||
|
||||
let tree3 = parser
|
||||
.parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), Some(&tree2))
|
||||
.unwrap();
|
||||
|
||||
// Verify that the final tree matches the expectation from the corpus.
|
||||
let actual_output = tree3.root_node().to_sexp();
|
||||
if actual_output != expected_output {
|
||||
println!(
|
||||
"Incorrect parse for {} - {} - trial {}",
|
||||
language_name, example_name, trial
|
||||
);
|
||||
print_diff_key();
|
||||
print_diff(&actual_output, &expected_output);
|
||||
println!("");
|
||||
failure_count += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check that the edited tree is consistent.
|
||||
check_consistent_sizes(&tree3, &input);
|
||||
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
|
||||
eprintln!(
|
||||
"Unexpected scope change in trial {}\n{}\n\n",
|
||||
trial, message
|
||||
);
|
||||
failure_count += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
drop(tree);
|
||||
drop(tree2);
|
||||
drop(tree3);
|
||||
drop(parser);
|
||||
allocations::stop_recording();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if failure_count > 0 {
|
||||
panic!("{} corpus tests failed", failure_count);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_feature_corpus_files() {
|
||||
let test_grammars_dir = fixtures_dir().join("test_grammars");
|
||||
|
||||
let mut failure_count = 0;
|
||||
for entry in fs::read_dir(&test_grammars_dir).unwrap() {
|
||||
let entry = entry.unwrap();
|
||||
if !entry.metadata().unwrap().is_dir() {
|
||||
continue;
|
||||
}
|
||||
let language_name = entry.file_name();
|
||||
let language_name = language_name.to_str().unwrap();
|
||||
|
||||
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
|
||||
if language_name != filter.as_str() {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let test_path = entry.path();
|
||||
let grammar_path = test_path.join("grammar.json");
|
||||
let error_message_path = test_path.join("expected_error.txt");
|
||||
let grammar_json = fs::read_to_string(grammar_path).unwrap();
|
||||
let generate_result = generate::generate_parser_for_grammar(&grammar_json);
|
||||
|
||||
if error_message_path.exists() {
|
||||
if EXAMPLE_FILTER.is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
eprintln!("test language: {:?}", language_name);
|
||||
|
||||
let expected_message = fs::read_to_string(&error_message_path).unwrap();
|
||||
if let Err(e) = generate_result {
|
||||
if e.0 != expected_message {
|
||||
eprintln!(
|
||||
"Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n",
|
||||
expected_message, e.0
|
||||
);
|
||||
failure_count += 1;
|
||||
}
|
||||
} else {
|
||||
eprintln!(
|
||||
"Expected error message but got none for test grammar '{}'",
|
||||
language_name
|
||||
);
|
||||
failure_count += 1;
|
||||
}
|
||||
} else {
|
||||
let corpus_path = test_path.join("corpus.txt");
|
||||
let c_code = generate_result.unwrap().1;
|
||||
let language = get_test_language(language_name, &c_code, Some(&test_path));
|
||||
let test = parse_tests(&corpus_path).unwrap();
|
||||
let tests = flatten_tests(test);
|
||||
|
||||
if !tests.is_empty() {
|
||||
eprintln!("test language: {:?}", language_name);
|
||||
}
|
||||
|
||||
for (name, input, expected_output) in tests {
|
||||
eprintln!(" example: {:?}", name);
|
||||
|
||||
allocations::start_recording();
|
||||
let mut log_session = None;
|
||||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse_utf8(&mut |i, _| &input[i..], None).unwrap();
|
||||
let actual_output = tree.root_node().to_sexp();
|
||||
drop(tree);
|
||||
drop(parser);
|
||||
if actual_output != expected_output {
|
||||
print_diff_key();
|
||||
print_diff(&actual_output, &expected_output);
|
||||
println!("");
|
||||
failure_count += 1;
|
||||
continue;
|
||||
}
|
||||
allocations::stop_recording();
|
||||
}
|
||||
}
|
||||
}
|
||||
if failure_count > 0 {
|
||||
panic!("{} corpus tests failed", failure_count);
|
||||
}
|
||||
}
|
||||
|
||||
fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
|
||||
fn check(node: Node, line_offsets: &Vec<usize>) {
|
||||
let start_byte = node.start_byte();
|
||||
let end_byte = node.end_byte();
|
||||
let start_point = node.start_position();
|
||||
let end_point = node.end_position();
|
||||
|
||||
assert!(start_byte <= end_byte);
|
||||
assert!(start_point <= end_point);
|
||||
assert_eq!(
|
||||
start_byte,
|
||||
line_offsets[start_point.row] + start_point.column
|
||||
);
|
||||
assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
|
||||
|
||||
let mut last_child_end_byte = start_byte;
|
||||
let mut last_child_end_point = start_point;
|
||||
let mut some_child_has_changes = false;
|
||||
let mut actual_named_child_count = 0;
|
||||
for child in node.children() {
|
||||
assert!(child.start_byte() >= last_child_end_byte);
|
||||
assert!(child.start_position() >= last_child_end_point);
|
||||
check(child, line_offsets);
|
||||
if child.has_changes() {
|
||||
some_child_has_changes = true;
|
||||
}
|
||||
if child.is_named() {
|
||||
actual_named_child_count += 1;
|
||||
}
|
||||
last_child_end_byte = child.end_byte();
|
||||
last_child_end_point = child.end_position();
|
||||
}
|
||||
|
||||
assert_eq!(actual_named_child_count, node.named_child_count());
|
||||
|
||||
if node.child_count() > 0 {
|
||||
assert!(end_byte >= last_child_end_byte);
|
||||
assert!(end_point >= last_child_end_point);
|
||||
}
|
||||
|
||||
if some_child_has_changes {
|
||||
assert!(node.has_changes());
|
||||
}
|
||||
}
|
||||
|
||||
let mut line_offsets = vec![0];
|
||||
for (i, c) in input.iter().enumerate() {
|
||||
if *c == '\n' as u8 {
|
||||
line_offsets.push(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
check(tree.root_node(), &line_offsets);
|
||||
}
|
||||
|
||||
fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String> {
|
||||
let changed_ranges = old_tree.changed_ranges(new_tree);
|
||||
let old_scope_sequence = ScopeSequence::new(old_tree);
|
||||
let new_scope_sequence = ScopeSequence::new(new_tree);
|
||||
old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
|
||||
}
|
||||
|
||||
fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
|
||||
let mut parser = Parser::new();
|
||||
|
||||
if *LOG_ENABLED {
|
||||
parser.set_logger(Some(Box::new(|log_type, msg| {
|
||||
if log_type == LogType::Lex {
|
||||
eprintln!(" {}", msg);
|
||||
} else {
|
||||
eprintln!("{}", msg);
|
||||
}
|
||||
})));
|
||||
} else if *LOG_GRAPH_ENABLED {
|
||||
*session = Some(util::log_graphs(&mut parser, log_filename).unwrap());
|
||||
}
|
||||
|
||||
parser
|
||||
}
|
||||
|
||||
fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String)> {
|
||||
fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec<u8>, String)>) {
|
||||
match test {
|
||||
TestEntry::Example {
|
||||
mut name,
|
||||
input,
|
||||
output,
|
||||
} => {
|
||||
if !prefix.is_empty() {
|
||||
name.insert_str(0, " - ");
|
||||
name.insert_str(0, prefix);
|
||||
}
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
if !name.contains(filter.as_str()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
result.push((name, input, output));
|
||||
}
|
||||
TestEntry::Group { mut name, children } => {
|
||||
if !prefix.is_empty() {
|
||||
name.insert_str(0, " - ");
|
||||
name.insert_str(0, prefix);
|
||||
}
|
||||
for child in children {
|
||||
helper(child, &name, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut result = Vec::new();
|
||||
helper(test, "", &mut result);
|
||||
result
|
||||
}
|
||||
104
cli/src/tests/helpers/allocations.rs
Normal file
104
cli/src/tests/helpers/allocations.rs
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
#![cfg(test)]
|
||||
#![allow(dead_code)]
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use spin::Mutex;
|
||||
use std::collections::HashMap;
|
||||
use std::os::raw::{c_ulong, c_void};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
||||
struct Allocation(*const c_void);
|
||||
unsafe impl Send for Allocation {}
|
||||
unsafe impl Sync for Allocation {}
|
||||
|
||||
#[derive(Default)]
|
||||
struct AllocationRecorder {
|
||||
enabled: bool,
|
||||
allocation_count: u64,
|
||||
outstanding_allocations: HashMap<Allocation, u64>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref RECORDER: Mutex<AllocationRecorder> = Mutex::new(AllocationRecorder::default());
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn malloc(size: c_ulong) -> *mut c_void;
|
||||
fn calloc(count: c_ulong, size: c_ulong) -> *mut c_void;
|
||||
fn realloc(ptr: *mut c_void, size: c_ulong) -> *mut c_void;
|
||||
fn free(ptr: *mut c_void);
|
||||
}
|
||||
|
||||
pub fn start_recording() {
|
||||
let mut recorder = RECORDER.lock();
|
||||
recorder.enabled = true;
|
||||
recorder.allocation_count = 0;
|
||||
recorder.outstanding_allocations.clear();
|
||||
}
|
||||
|
||||
pub fn stop_recording() {
|
||||
let mut recorder = RECORDER.lock();
|
||||
recorder.enabled = false;
|
||||
|
||||
if !recorder.outstanding_allocations.is_empty() {
|
||||
let mut allocation_indices = recorder
|
||||
.outstanding_allocations
|
||||
.iter()
|
||||
.map(|e| e.1)
|
||||
.collect::<Vec<_>>();
|
||||
allocation_indices.sort_unstable();
|
||||
panic!("Leaked allocation indices: {:?}", allocation_indices);
|
||||
}
|
||||
}
|
||||
|
||||
fn record_alloc(ptr: *mut c_void) {
|
||||
let mut recorder = RECORDER.lock();
|
||||
if recorder.enabled {
|
||||
let count = recorder.allocation_count;
|
||||
recorder.allocation_count += 1;
|
||||
recorder
|
||||
.outstanding_allocations
|
||||
.insert(Allocation(ptr), count);
|
||||
}
|
||||
}
|
||||
|
||||
fn record_dealloc(ptr: *mut c_void) {
|
||||
let mut recorder = RECORDER.lock();
|
||||
if recorder.enabled {
|
||||
recorder.outstanding_allocations.remove(&Allocation(ptr));
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
extern "C" fn ts_record_malloc(size: c_ulong) -> *const c_void {
|
||||
let result = unsafe { malloc(size) };
|
||||
record_alloc(result);
|
||||
result
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
extern "C" fn ts_record_calloc(count: c_ulong, size: c_ulong) -> *const c_void {
|
||||
let result = unsafe { calloc(count, size) };
|
||||
record_alloc(result);
|
||||
result
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
extern "C" fn ts_record_realloc(ptr: *mut c_void, size: c_ulong) -> *const c_void {
|
||||
record_dealloc(ptr);
|
||||
let result = unsafe { realloc(ptr, size) };
|
||||
record_alloc(result);
|
||||
result
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
extern "C" fn ts_record_free(ptr: *mut c_void) {
|
||||
record_dealloc(ptr);
|
||||
unsafe { free(ptr) };
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
extern "C" fn ts_record_allocations_toggle() {
|
||||
let mut recorder = RECORDER.lock();
|
||||
recorder.enabled = !recorder.enabled;
|
||||
}
|
||||
11
cli/src/tests/helpers/dirs.rs
Normal file
11
cli/src/tests/helpers/dirs.rs
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
lazy_static! {
|
||||
static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
|
||||
static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
|
||||
static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
|
||||
static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
|
||||
static ref SCRATCH_DIR: PathBuf = {
|
||||
let result = ROOT_DIR.join("target").join("scratch");
|
||||
fs::create_dir_all(&result).unwrap();
|
||||
result
|
||||
};
|
||||
}
|
||||
145
cli/src/tests/helpers/edits.rs
Normal file
145
cli/src/tests/helpers/edits.rs
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
use super::random::Rand;
|
||||
use std::ops::Range;
|
||||
use std::str;
|
||||
use tree_sitter::{InputEdit, Point, Tree};
|
||||
|
||||
pub struct Edit {
|
||||
pub position: usize,
|
||||
pub deleted_length: usize,
|
||||
pub inserted_text: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ReadRecorder<'a> {
|
||||
content: &'a Vec<u8>,
|
||||
indices_read: Vec<usize>,
|
||||
}
|
||||
|
||||
impl<'a> ReadRecorder<'a> {
|
||||
pub fn new(content: &'a Vec<u8>) -> Self {
|
||||
Self {
|
||||
content,
|
||||
indices_read: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read(&mut self, offset: usize) -> &'a [u8] {
|
||||
if offset < self.content.len() {
|
||||
if let Err(i) = self.indices_read.binary_search(&offset) {
|
||||
self.indices_read.insert(i, offset);
|
||||
}
|
||||
&self.content[offset..(offset + 1)]
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn strings_read(&self) -> Vec<&'a str> {
|
||||
let mut result = Vec::new();
|
||||
let mut last_range: Option<Range<usize>> = None;
|
||||
for index in self.indices_read.iter() {
|
||||
if let Some(ref mut range) = &mut last_range {
|
||||
if range.end == *index {
|
||||
range.end += 1;
|
||||
} else {
|
||||
result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
|
||||
last_range = None;
|
||||
}
|
||||
} else {
|
||||
last_range = Some(*index..(*index + 1));
|
||||
}
|
||||
}
|
||||
if let Some(range) = last_range {
|
||||
result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
|
||||
let start_byte = edit.position;
|
||||
let old_end_byte = edit.position + edit.deleted_length;
|
||||
let new_end_byte = edit.position + edit.inserted_text.len();
|
||||
let start_position = position_for_offset(input, start_byte);
|
||||
let old_end_position = position_for_offset(input, old_end_byte);
|
||||
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
|
||||
let new_end_position = position_for_offset(input, new_end_byte);
|
||||
let edit = InputEdit {
|
||||
start_byte,
|
||||
old_end_byte,
|
||||
new_end_byte,
|
||||
start_position,
|
||||
old_end_position,
|
||||
new_end_position,
|
||||
};
|
||||
tree.edit(&edit);
|
||||
edit
|
||||
}
|
||||
|
||||
pub fn invert_edit(input: &Vec<u8>, edit: &Edit) -> Edit {
|
||||
let position = edit.position;
|
||||
let removed_content = &input[position..(position + edit.deleted_length)];
|
||||
Edit {
|
||||
position,
|
||||
deleted_length: edit.inserted_text.len(),
|
||||
inserted_text: removed_content.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_random_edit(rand: &mut Rand, input: &Vec<u8>) -> Edit {
|
||||
let choice = rand.unsigned(10);
|
||||
if choice < 2 {
|
||||
// Insert text at end
|
||||
let inserted_text = rand.words(3);
|
||||
Edit {
|
||||
position: input.len(),
|
||||
deleted_length: 0,
|
||||
inserted_text,
|
||||
}
|
||||
} else if choice < 5 {
|
||||
// Delete text from the end
|
||||
let mut deleted_length = rand.unsigned(10);
|
||||
if deleted_length > input.len() {
|
||||
deleted_length = input.len();
|
||||
}
|
||||
Edit {
|
||||
position: input.len() - deleted_length,
|
||||
deleted_length,
|
||||
inserted_text: vec![],
|
||||
}
|
||||
} else if choice < 8 {
|
||||
// Insert at a random position
|
||||
let position = rand.unsigned(input.len());
|
||||
let word_count = 1 + rand.unsigned(3);
|
||||
let inserted_text = rand.words(word_count);
|
||||
Edit {
|
||||
position,
|
||||
deleted_length: 0,
|
||||
inserted_text,
|
||||
}
|
||||
} else {
|
||||
// Replace at random position
|
||||
let position = rand.unsigned(input.len());
|
||||
let deleted_length = rand.unsigned(input.len() - position);
|
||||
let word_count = 1 + rand.unsigned(3);
|
||||
let inserted_text = rand.words(word_count);
|
||||
Edit {
|
||||
position,
|
||||
deleted_length,
|
||||
inserted_text,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
|
||||
let mut result = Point { row: 0, column: 0 };
|
||||
for c in &input[0..offset] {
|
||||
if *c as char == '\n' {
|
||||
result.row += 1;
|
||||
result.column = 0;
|
||||
} else {
|
||||
result.column += 1;
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
42
cli/src/tests/helpers/fixtures.rs
Normal file
42
cli/src/tests/helpers/fixtures.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
use crate::loader::Loader;
|
||||
use lazy_static::lazy_static;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tree_sitter::Language;
|
||||
|
||||
include!("./dirs.rs");
|
||||
|
||||
lazy_static! {
|
||||
static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
|
||||
}
|
||||
|
||||
pub fn fixtures_dir<'a>() -> &'static Path {
|
||||
&FIXTURES_DIR
|
||||
}
|
||||
|
||||
pub fn get_language(name: &str) -> Language {
|
||||
TEST_LOADER
|
||||
.load_language_at_path(name, &GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
|
||||
let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name));
|
||||
if !fs::read_to_string(&parser_c_path)
|
||||
.map(|content| content == parser_code)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
fs::write(&parser_c_path, parser_code).unwrap();
|
||||
}
|
||||
let scanner_path = path.and_then(|p| {
|
||||
let result = p.join("scanner.c");
|
||||
if result.exists() {
|
||||
Some(result)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
TEST_LOADER
|
||||
.load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
|
||||
.unwrap()
|
||||
}
|
||||
5
cli/src/tests/helpers/mod.rs
Normal file
5
cli/src/tests/helpers/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
pub(super) mod allocations;
|
||||
pub(super) mod fixtures;
|
||||
pub(super) mod random;
|
||||
pub(super) mod scope_sequence;
|
||||
pub(super) mod edits;
|
||||
41
cli/src/tests/helpers/random.rs
Normal file
41
cli/src/tests/helpers/random.rs
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
use rand::distributions::Alphanumeric;
|
||||
use rand::prelude::{Rng, SeedableRng, SmallRng};
|
||||
|
||||
const OPERATORS: &[char] = &[
|
||||
'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.',
|
||||
];
|
||||
|
||||
pub struct Rand(SmallRng);
|
||||
|
||||
impl Rand {
|
||||
pub fn new(seed: usize) -> Self {
|
||||
Rand(SmallRng::seed_from_u64(seed as u64))
|
||||
}
|
||||
|
||||
pub fn unsigned(&mut self, max: usize) -> usize {
|
||||
self.0.gen_range(0, max + 1)
|
||||
}
|
||||
|
||||
pub fn words(&mut self, max_count: usize) -> Vec<u8> {
|
||||
let mut result = Vec::new();
|
||||
let word_count = self.unsigned(max_count);
|
||||
for i in 0..word_count {
|
||||
if i > 0 {
|
||||
if self.unsigned(5) == 0 {
|
||||
result.push('\n' as u8);
|
||||
} else {
|
||||
result.push(' ' as u8);
|
||||
}
|
||||
}
|
||||
if self.unsigned(3) == 0 {
|
||||
let index = self.unsigned(OPERATORS.len() - 1);
|
||||
result.push(OPERATORS[index] as u8);
|
||||
} else {
|
||||
for _ in 0..self.unsigned(8) {
|
||||
result.push(self.0.sample(Alphanumeric) as u8);
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
97
cli/src/tests/helpers/scope_sequence.rs
Normal file
97
cli/src/tests/helpers/scope_sequence.rs
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
use tree_sitter::{Point, Range, Tree};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ScopeSequence(Vec<ScopeStack>);
|
||||
|
||||
type ScopeStack = Vec<&'static str>;
|
||||
|
||||
impl ScopeSequence {
|
||||
pub fn new(tree: &Tree) -> Self {
|
||||
let mut result = ScopeSequence(Vec::new());
|
||||
let mut scope_stack = Vec::new();
|
||||
|
||||
let mut cursor = tree.walk();
|
||||
let mut visited_children = false;
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
for _ in result.0.len()..node.start_byte() {
|
||||
result.0.push(scope_stack.clone());
|
||||
}
|
||||
if visited_children {
|
||||
for _ in result.0.len()..node.end_byte() {
|
||||
result.0.push(scope_stack.clone());
|
||||
}
|
||||
scope_stack.pop();
|
||||
if cursor.goto_next_sibling() {
|
||||
visited_children = false;
|
||||
} else if !cursor.goto_parent() {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
scope_stack.push(cursor.node().kind());
|
||||
if !cursor.goto_first_child() {
|
||||
visited_children = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn check_changes(
|
||||
&self,
|
||||
other: &ScopeSequence,
|
||||
text: &Vec<u8>,
|
||||
known_changed_ranges: &Vec<Range>,
|
||||
) -> Result<(), String> {
|
||||
if self.0.len() != text.len() {
|
||||
panic!(
|
||||
"Inconsistent scope sequence: {:?}",
|
||||
self.0.iter().zip(text.iter().map(|c| *c as char)).collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(self.0.len(), other.0.len());
|
||||
let mut position = Point { row: 0, column: 0 };
|
||||
for (i, stack) in self.0.iter().enumerate() {
|
||||
let other_stack = &other.0[i];
|
||||
if *stack != *other_stack {
|
||||
let containing_range = known_changed_ranges
|
||||
.iter()
|
||||
.find(|range| range.start_point <= position && position < range.end_point);
|
||||
if containing_range.is_none() {
|
||||
let line = &text[(i - position.column)..]
|
||||
.split(|c| *c == '\n' as u8)
|
||||
.next()
|
||||
.unwrap();
|
||||
return Err(format!(
|
||||
concat!(
|
||||
"Position: {}\n",
|
||||
"Byte offset: {}\n",
|
||||
"Line: {}\n",
|
||||
"{}^\n",
|
||||
"Old scopes: {:?}\n",
|
||||
"New scopes: {:?}\n",
|
||||
"Invalidated ranges: {:?}",
|
||||
),
|
||||
position,
|
||||
i,
|
||||
String::from_utf8_lossy(line),
|
||||
String::from(" ").repeat(position.column + "Line: ".len()),
|
||||
stack,
|
||||
other_stack,
|
||||
known_changed_ranges,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if text[i] == '\n' as u8 {
|
||||
position.row += 1;
|
||||
position.column = 0;
|
||||
} else {
|
||||
position.column += 1;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
6
cli/src/tests/mod.rs
Normal file
6
cli/src/tests/mod.rs
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
mod corpus_test;
|
||||
mod helpers;
|
||||
mod node_test;
|
||||
mod parser_test;
|
||||
mod properties_test;
|
||||
mod tree_test;
|
||||
364
cli/src/tests/node_test.rs
Normal file
364
cli/src/tests/node_test.rs
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
use super::helpers::fixtures::{get_language, get_test_language};
|
||||
use super::helpers::random::Rand;
|
||||
use super::helpers::edits::{get_random_edit, perform_edit};
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use tree_sitter::{Node, Parser, Point, Tree};
|
||||
|
||||
const JSON_EXAMPLE: &'static str = r#"
|
||||
|
||||
[
|
||||
123,
|
||||
false,
|
||||
{
|
||||
"x": null
|
||||
}
|
||||
]
|
||||
"#;
|
||||
|
||||
const GRAMMAR_WITH_ALIASES_AND_EXTRAS: &'static str = r#"{
|
||||
"name": "aliases_and_extras",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s+"},
|
||||
{"type": "SYMBOL", "name": "comment"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"a": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "b"},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"value": "B",
|
||||
"named": true,
|
||||
"content": {"type": "SYMBOL", "name": "b"}
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"value": "C",
|
||||
"named": true,
|
||||
"content": {"type": "SYMBOL", "name": "_c"}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"b": {"type": "STRING", "value": "b"},
|
||||
|
||||
"_c": {"type": "STRING", "value": "c"},
|
||||
|
||||
"comment": {"type": "STRING", "value": "..."}
|
||||
}
|
||||
}"#;
|
||||
|
||||
#[test]
|
||||
fn test_node_child() {
|
||||
let tree = parse_json_example();
|
||||
let array_node = tree.root_node().child(0).unwrap();
|
||||
|
||||
assert_eq!(array_node.kind(), "array");
|
||||
assert_eq!(array_node.named_child_count(), 3);
|
||||
assert_eq!(array_node.start_byte(), JSON_EXAMPLE.find("[").unwrap());
|
||||
assert_eq!(array_node.end_byte(), JSON_EXAMPLE.find("]").unwrap() + 1);
|
||||
assert_eq!(array_node.start_position(), Point::new(2, 0));
|
||||
assert_eq!(array_node.end_position(), Point::new(8, 1));
|
||||
assert_eq!(array_node.child_count(), 7);
|
||||
|
||||
let left_bracket_node = array_node.child(0).unwrap();
|
||||
let number_node = array_node.child(1).unwrap();
|
||||
let comma_node1 = array_node.child(2).unwrap();
|
||||
let false_node = array_node.child(3).unwrap();
|
||||
let comma_node2 = array_node.child(4).unwrap();
|
||||
let object_node = array_node.child(5).unwrap();
|
||||
let right_bracket_node = array_node.child(6).unwrap();
|
||||
|
||||
assert_eq!(left_bracket_node.kind(), "[");
|
||||
assert_eq!(number_node.kind(), "number");
|
||||
assert_eq!(comma_node1.kind(), ",");
|
||||
assert_eq!(false_node.kind(), "false");
|
||||
assert_eq!(comma_node2.kind(), ",");
|
||||
assert_eq!(object_node.kind(), "object");
|
||||
assert_eq!(right_bracket_node.kind(), "]");
|
||||
|
||||
assert_eq!(left_bracket_node.is_named(), false);
|
||||
assert_eq!(number_node.is_named(), true);
|
||||
assert_eq!(comma_node1.is_named(), false);
|
||||
assert_eq!(false_node.is_named(), true);
|
||||
assert_eq!(comma_node2.is_named(), false);
|
||||
assert_eq!(object_node.is_named(), true);
|
||||
assert_eq!(right_bracket_node.is_named(), false);
|
||||
|
||||
assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap());
|
||||
assert_eq!(
|
||||
number_node.end_byte(),
|
||||
JSON_EXAMPLE.find("123").unwrap() + 3
|
||||
);
|
||||
assert_eq!(number_node.start_position(), Point::new(3, 2));
|
||||
assert_eq!(number_node.end_position(), Point::new(3, 5));
|
||||
|
||||
assert_eq!(false_node.start_byte(), JSON_EXAMPLE.find("false").unwrap());
|
||||
assert_eq!(
|
||||
false_node.end_byte(),
|
||||
JSON_EXAMPLE.find("false").unwrap() + 5
|
||||
);
|
||||
assert_eq!(false_node.start_position(), Point::new(4, 2));
|
||||
assert_eq!(false_node.end_position(), Point::new(4, 7));
|
||||
|
||||
assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find("{").unwrap());
|
||||
assert_eq!(object_node.start_position(), Point::new(5, 2));
|
||||
assert_eq!(object_node.end_position(), Point::new(7, 3));
|
||||
|
||||
assert_eq!(object_node.child_count(), 3);
|
||||
let left_brace_node = object_node.child(0).unwrap();
|
||||
let pair_node = object_node.child(1).unwrap();
|
||||
let right_brace_node = object_node.child(2).unwrap();
|
||||
|
||||
assert_eq!(left_brace_node.kind(), "{");
|
||||
assert_eq!(pair_node.kind(), "pair");
|
||||
assert_eq!(right_brace_node.kind(), "}");
|
||||
|
||||
assert_eq!(left_brace_node.is_named(), false);
|
||||
assert_eq!(pair_node.is_named(), true);
|
||||
assert_eq!(right_brace_node.is_named(), false);
|
||||
|
||||
assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap());
|
||||
assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
|
||||
assert_eq!(pair_node.start_position(), Point::new(6, 4));
|
||||
assert_eq!(pair_node.end_position(), Point::new(6, 13));
|
||||
|
||||
assert_eq!(pair_node.child_count(), 3);
|
||||
let string_node = pair_node.child(0).unwrap();
|
||||
let colon_node = pair_node.child(1).unwrap();
|
||||
let null_node = pair_node.child(2).unwrap();
|
||||
|
||||
assert_eq!(string_node.kind(), "string");
|
||||
assert_eq!(colon_node.kind(), ":");
|
||||
assert_eq!(null_node.kind(), "null");
|
||||
|
||||
assert_eq!(string_node.is_named(), true);
|
||||
assert_eq!(colon_node.is_named(), false);
|
||||
assert_eq!(null_node.is_named(), true);
|
||||
|
||||
assert_eq!(
|
||||
string_node.start_byte(),
|
||||
JSON_EXAMPLE.find("\"x\"").unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
string_node.end_byte(),
|
||||
JSON_EXAMPLE.find("\"x\"").unwrap() + 3
|
||||
);
|
||||
assert_eq!(string_node.start_position(), Point::new(6, 4));
|
||||
assert_eq!(string_node.end_position(), Point::new(6, 7));
|
||||
|
||||
assert_eq!(null_node.start_byte(), JSON_EXAMPLE.find("null").unwrap());
|
||||
assert_eq!(null_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
|
||||
assert_eq!(null_node.start_position(), Point::new(6, 9));
|
||||
assert_eq!(null_node.end_position(), Point::new(6, 13));
|
||||
|
||||
assert_eq!(string_node.parent().unwrap(), pair_node);
|
||||
assert_eq!(null_node.parent().unwrap(), pair_node);
|
||||
assert_eq!(pair_node.parent().unwrap(), object_node);
|
||||
assert_eq!(number_node.parent().unwrap(), array_node);
|
||||
assert_eq!(false_node.parent().unwrap(), array_node);
|
||||
assert_eq!(object_node.parent().unwrap(), array_node);
|
||||
assert_eq!(array_node.parent().unwrap(), tree.root_node());
|
||||
assert_eq!(tree.root_node().parent(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_named_child() {
|
||||
let tree = parse_json_example();
|
||||
let array_node = tree.root_node().child(0).unwrap();
|
||||
|
||||
let number_node = array_node.named_child(0).unwrap();
|
||||
let false_node = array_node.named_child(1).unwrap();
|
||||
let object_node = array_node.named_child(2).unwrap();
|
||||
|
||||
assert_eq!(number_node.kind(), "number");
|
||||
assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap());
|
||||
assert_eq!(
|
||||
number_node.end_byte(),
|
||||
JSON_EXAMPLE.find("123").unwrap() + 3
|
||||
);
|
||||
assert_eq!(number_node.start_position(), Point::new(3, 2));
|
||||
assert_eq!(number_node.end_position(), Point::new(3, 5));
|
||||
|
||||
assert_eq!(false_node.kind(), "false");
|
||||
assert_eq!(false_node.start_byte(), JSON_EXAMPLE.find("false").unwrap());
|
||||
assert_eq!(
|
||||
false_node.end_byte(),
|
||||
JSON_EXAMPLE.find("false").unwrap() + 5
|
||||
);
|
||||
assert_eq!(false_node.start_position(), Point::new(4, 2));
|
||||
assert_eq!(false_node.end_position(), Point::new(4, 7));
|
||||
|
||||
assert_eq!(object_node.kind(), "object");
|
||||
assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find("{").unwrap());
|
||||
assert_eq!(object_node.start_position(), Point::new(5, 2));
|
||||
assert_eq!(object_node.end_position(), Point::new(7, 3));
|
||||
|
||||
assert_eq!(object_node.named_child_count(), 1);
|
||||
|
||||
let pair_node = object_node.named_child(0).unwrap();
|
||||
assert_eq!(pair_node.kind(), "pair");
|
||||
assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap());
|
||||
assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
|
||||
assert_eq!(pair_node.start_position(), Point::new(6, 4));
|
||||
assert_eq!(pair_node.end_position(), Point::new(6, 13));
|
||||
|
||||
let string_node = pair_node.named_child(0).unwrap();
|
||||
let null_node = pair_node.named_child(1).unwrap();
|
||||
|
||||
assert_eq!(string_node.kind(), "string");
|
||||
assert_eq!(null_node.kind(), "null");
|
||||
|
||||
assert_eq!(
|
||||
string_node.start_byte(),
|
||||
JSON_EXAMPLE.find("\"x\"").unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
string_node.end_byte(),
|
||||
JSON_EXAMPLE.find("\"x\"").unwrap() + 3
|
||||
);
|
||||
assert_eq!(string_node.start_position(), Point::new(6, 4));
|
||||
assert_eq!(string_node.end_position(), Point::new(6, 7));
|
||||
|
||||
assert_eq!(null_node.start_byte(), JSON_EXAMPLE.find("null").unwrap());
|
||||
assert_eq!(null_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
|
||||
assert_eq!(null_node.start_position(), Point::new(6, 9));
|
||||
assert_eq!(null_node.end_position(), Point::new(6, 13));
|
||||
|
||||
assert_eq!(string_node.parent().unwrap(), pair_node);
|
||||
assert_eq!(null_node.parent().unwrap(), pair_node);
|
||||
assert_eq!(pair_node.parent().unwrap(), object_node);
|
||||
assert_eq!(number_node.parent().unwrap(), array_node);
|
||||
assert_eq!(false_node.parent().unwrap(), array_node);
|
||||
assert_eq!(object_node.parent().unwrap(), array_node);
|
||||
assert_eq!(array_node.parent().unwrap(), tree.root_node());
|
||||
assert_eq!(tree.root_node().parent(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_named_child_with_aliases_and_extras() {
|
||||
let (parser_name, parser_code) =
|
||||
generate_parser_for_grammar(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(get_test_language(&parser_name, &parser_code, None))
|
||||
.unwrap();
|
||||
|
||||
let tree = parser.parse_str("b ... b ... c", None).unwrap();
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(a (b) (comment) (B) (comment) (C))");
|
||||
assert_eq!(root.named_child_count(), 5);
|
||||
assert_eq!(root.named_child(0).unwrap().kind(), "b");
|
||||
assert_eq!(root.named_child(1).unwrap().kind(), "comment");
|
||||
assert_eq!(root.named_child(2).unwrap().kind(), "B");
|
||||
assert_eq!(root.named_child(3).unwrap().kind(), "comment");
|
||||
assert_eq!(root.named_child(4).unwrap().kind(), "C");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_descendant_for_range() {
|
||||
let tree = parse_json_example();
|
||||
let array_node = tree.root_node().child(0).unwrap();
|
||||
|
||||
let colon_index = JSON_EXAMPLE.find(":").unwrap();
|
||||
let node1 = array_node
|
||||
.descendant_for_byte_range(colon_index, colon_index)
|
||||
.unwrap();
|
||||
assert_eq!(node1.kind(), ":");
|
||||
assert_eq!(node1.start_byte(), colon_index);
|
||||
assert_eq!(node1.end_byte(), colon_index + 1);
|
||||
assert_eq!(node1.start_position(), Point::new(6, 7));
|
||||
assert_eq!(node1.end_position(), Point::new(6, 8));
|
||||
|
||||
let string_index = JSON_EXAMPLE.find("\"x\"").unwrap();
|
||||
let node2 = array_node
|
||||
.descendant_for_byte_range(string_index + 2, string_index + 4)
|
||||
.unwrap();
|
||||
assert_eq!(node2.kind(), "pair");
|
||||
assert_eq!(node2.start_byte(), string_index);
|
||||
assert_eq!(node2.end_byte(), string_index + 9);
|
||||
assert_eq!(node2.start_position(), Point::new(6, 4));
|
||||
assert_eq!(node2.end_position(), Point::new(6, 13));
|
||||
|
||||
assert_eq!(node1.parent(), Some(node2));
|
||||
|
||||
let node3 = array_node
|
||||
.named_descendant_for_byte_range(string_index, string_index + 2)
|
||||
.unwrap();
|
||||
assert_eq!(node3.kind(), "string");
|
||||
assert_eq!(node3.start_byte(), string_index);
|
||||
assert_eq!(node3.end_byte(), string_index + 3);
|
||||
|
||||
// no leaf spans the given range - return the smallest node that does span it.
|
||||
let node4 = array_node
|
||||
.named_descendant_for_byte_range(string_index, string_index + 3)
|
||||
.unwrap();
|
||||
assert_eq!(node4.kind(), "pair");
|
||||
assert_eq!(node4.start_byte(), string_index);
|
||||
assert_eq!(node4.end_byte(), string_index + 9);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_edit() {
|
||||
let mut code = JSON_EXAMPLE.as_bytes().to_vec();
|
||||
let mut tree = parse_json_example();
|
||||
let mut rand = Rand::new(0);
|
||||
|
||||
for _ in 0..10 {
|
||||
let mut nodes_before = get_all_nodes(&tree);
|
||||
|
||||
let edit = get_random_edit(&mut rand, &mut code);
|
||||
let mut tree2 = tree.clone();
|
||||
let edit = perform_edit(&mut tree2, &mut code, &edit);
|
||||
for node in nodes_before.iter_mut() {
|
||||
node.edit(&edit);
|
||||
}
|
||||
|
||||
let nodes_after = get_all_nodes(&tree2);
|
||||
for (i, node) in nodes_before.into_iter().enumerate() {
|
||||
assert_eq!(
|
||||
(
|
||||
node.kind(),
|
||||
node.start_byte(),
|
||||
node.start_position()
|
||||
),
|
||||
(
|
||||
nodes_after[i].kind(),
|
||||
nodes_after[i].start_byte(),
|
||||
nodes_after[i].start_position()
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
tree = tree2;
|
||||
}
|
||||
}
|
||||
|
||||
fn get_all_nodes(tree: &Tree) -> Vec<Node> {
|
||||
let mut result = Vec::new();
|
||||
let mut visited_children = false;
|
||||
let mut cursor = tree.walk();
|
||||
loop {
|
||||
result.push(cursor.node());
|
||||
if !visited_children && cursor.goto_first_child() {
|
||||
continue;
|
||||
} else if cursor.goto_next_sibling() {
|
||||
visited_children = false;
|
||||
} else if cursor.goto_parent() {
|
||||
visited_children = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn parse_json_example() -> Tree {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("json")).unwrap();
|
||||
parser.parse_str(JSON_EXAMPLE, None).unwrap()
|
||||
}
|
||||
732
cli/src/tests/parser_test.rs
Normal file
732
cli/src/tests/parser_test.rs
Normal file
|
|
@ -0,0 +1,732 @@
|
|||
use super::helpers::edits::{perform_edit, Edit, ReadRecorder};
|
||||
use super::helpers::fixtures::{get_language, get_test_language};
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use std::{thread, usize};
|
||||
use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
|
||||
|
||||
#[test]
|
||||
fn test_basic_parsing() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
|
||||
let tree = parser
|
||||
.parse_str(
|
||||
"
|
||||
struct Stuff {}
|
||||
fn main() {}
|
||||
",
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let root_node = tree.root_node();
|
||||
assert_eq!(root_node.kind(), "source_file");
|
||||
|
||||
assert_eq!(
|
||||
root_node.to_sexp(),
|
||||
"(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
|
||||
);
|
||||
|
||||
let struct_node = root_node.child(0).unwrap();
|
||||
assert_eq!(struct_node.kind(), "struct_item");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_logging() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
|
||||
let mut messages = Vec::new();
|
||||
parser.set_logger(Some(Box::new(|log_type, message| {
|
||||
messages.push((log_type, message.to_string()));
|
||||
})));
|
||||
|
||||
parser
|
||||
.parse_str(
|
||||
"
|
||||
struct Stuff {}
|
||||
fn main() {}
|
||||
",
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(messages.contains(&(
|
||||
LogType::Parse,
|
||||
"reduce sym:struct_item, child_count:3".to_string()
|
||||
)));
|
||||
assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_custom_utf8_input() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
|
||||
let lines = &["pub fn foo() {", " 1", "}"];
|
||||
|
||||
let tree = parser
|
||||
.parse_utf8(
|
||||
&mut |_, position| {
|
||||
let row = position.row;
|
||||
let column = position.column;
|
||||
if row < lines.len() {
|
||||
if column < lines[row].as_bytes().len() {
|
||||
&lines[row].as_bytes()[column..]
|
||||
} else {
|
||||
"\n".as_bytes()
|
||||
}
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
},
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
|
||||
assert_eq!(root.kind(), "source_file");
|
||||
assert_eq!(root.has_error(), false);
|
||||
assert_eq!(root.child(0).unwrap().kind(), "function_item");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_custom_utf16_input() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
|
||||
let lines: Vec<Vec<u16>> = ["pub fn foo() {", " 1", "}"]
|
||||
.iter()
|
||||
.map(|s| s.encode_utf16().collect())
|
||||
.collect();
|
||||
|
||||
let tree = parser
|
||||
.parse_utf16(
|
||||
&mut |_, position| {
|
||||
let row = position.row;
|
||||
let column = position.column;
|
||||
if row < lines.len() {
|
||||
if column < lines[row].len() {
|
||||
&lines[row][column..]
|
||||
} else {
|
||||
&[10]
|
||||
}
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
},
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let root = tree.root_node();
|
||||
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
|
||||
assert_eq!(root.kind(), "source_file");
|
||||
assert_eq!(root.has_error(), false);
|
||||
assert_eq!(root.child(0).unwrap().kind(), "function_item");
|
||||
}
|
||||
|
||||
// Incremental parsing
|
||||
|
||||
#[test]
|
||||
fn test_parsing_after_editing_beginning_of_code() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
|
||||
let mut code = b"123 + 456 * (10 + x);".to_vec();
|
||||
let mut tree = parser.parse_utf8(&mut |i, _| &code[i..], None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program (expression_statement (binary_expression ",
|
||||
"(number) ",
|
||||
"(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier)))))))",
|
||||
)
|
||||
);
|
||||
|
||||
perform_edit(
|
||||
&mut tree,
|
||||
&mut code,
|
||||
&Edit {
|
||||
position: 3,
|
||||
deleted_length: 0,
|
||||
inserted_text: b" || 5".to_vec(),
|
||||
},
|
||||
);
|
||||
|
||||
let mut recorder = ReadRecorder::new(&code);
|
||||
let tree = parser
|
||||
.parse_utf8(&mut |i, _| recorder.read(i), Some(&tree))
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program (expression_statement (binary_expression ",
|
||||
"(number) ",
|
||||
"(binary_expression ",
|
||||
"(number) ",
|
||||
"(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier))))))))",
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(recorder.strings_read(), vec!["123 || 5 "]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_after_editing_end_of_code() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
|
||||
let mut code = b"x * (100 + abc);".to_vec();
|
||||
let mut tree = parser.parse_utf8(&mut |i, _| &code[i..], None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program (expression_statement (binary_expression ",
|
||||
"(identifier) ",
|
||||
"(parenthesized_expression (binary_expression (number) (identifier))))))",
|
||||
)
|
||||
);
|
||||
|
||||
let position = code.len() - 2;
|
||||
perform_edit(
|
||||
&mut tree,
|
||||
&mut code,
|
||||
&Edit {
|
||||
position,
|
||||
deleted_length: 0,
|
||||
inserted_text: b".d".to_vec(),
|
||||
},
|
||||
);
|
||||
|
||||
let mut recorder = ReadRecorder::new(&code);
|
||||
let tree = parser
|
||||
.parse_utf8(&mut |i, _| recorder.read(i), Some(&tree))
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program (expression_statement (binary_expression ",
|
||||
"(identifier) ",
|
||||
"(parenthesized_expression (binary_expression (number) (member_expression (identifier) (property_identifier)))))))"
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]);
|
||||
}
|
||||
|
||||
// Thread safety
|
||||
|
||||
#[test]
|
||||
fn test_parsing_on_multiple_threads() {
|
||||
// Parse this source file so that each thread has a non-trivial amount of
|
||||
// work to do.
|
||||
let this_file_source = include_str!("parser_test.rs");
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
let tree = parser.parse_str(this_file_source, None).unwrap();
|
||||
|
||||
let mut parse_threads = Vec::new();
|
||||
for thread_id in 1..5 {
|
||||
let mut tree_clone = tree.clone();
|
||||
parse_threads.push(thread::spawn(move || {
|
||||
// For each thread, prepend a different number of declarations to the
|
||||
// source code.
|
||||
let mut prepend_line_count = 0;
|
||||
let mut prepended_source = String::new();
|
||||
for _ in 0..thread_id {
|
||||
prepend_line_count += 2;
|
||||
prepended_source += "struct X {}\n\n";
|
||||
}
|
||||
|
||||
tree_clone.edit(&InputEdit {
|
||||
start_byte: 0,
|
||||
old_end_byte: 0,
|
||||
new_end_byte: prepended_source.len(),
|
||||
start_position: Point::new(0, 0),
|
||||
old_end_position: Point::new(0, 0),
|
||||
new_end_position: Point::new(prepend_line_count, 0),
|
||||
});
|
||||
prepended_source += this_file_source;
|
||||
|
||||
// Reparse using the old tree as a starting point.
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
parser
|
||||
.parse_str(&prepended_source, Some(&tree_clone))
|
||||
.unwrap()
|
||||
}));
|
||||
}
|
||||
|
||||
// Check that the trees have the expected relationship to one another.
|
||||
let trees = parse_threads
|
||||
.into_iter()
|
||||
.map(|thread| thread.join().unwrap());
|
||||
let child_count_differences = trees
|
||||
.map(|t| t.root_node().child_count() - tree.root_node().child_count())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(child_count_differences, &[1, 2, 3, 4]);
|
||||
}
|
||||
|
||||
// Operation limits
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_an_operation_limit() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("json")).unwrap();
|
||||
|
||||
// Start parsing from an infinite input. Parsing should abort after 5 "operations".
|
||||
parser.set_operation_limit(5);
|
||||
let mut call_count = 0;
|
||||
let tree = parser.parse_utf8(
|
||||
&mut |_, _| {
|
||||
if call_count == 0 {
|
||||
call_count += 1;
|
||||
b"[0"
|
||||
} else {
|
||||
call_count += 1;
|
||||
b", 0"
|
||||
}
|
||||
},
|
||||
None,
|
||||
);
|
||||
assert!(tree.is_none());
|
||||
assert!(call_count >= 3);
|
||||
assert!(call_count <= 8);
|
||||
|
||||
// Resume parsing from the previous state.
|
||||
call_count = 0;
|
||||
parser.set_operation_limit(20);
|
||||
let tree = parser
|
||||
.parse_utf8(
|
||||
&mut |_, _| {
|
||||
if call_count == 0 {
|
||||
call_count += 1;
|
||||
b"]"
|
||||
} else {
|
||||
b""
|
||||
}
|
||||
},
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(value (array (number) (number) (number)))"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_a_reset_after_reaching_an_operation_limit() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("json")).unwrap();
|
||||
|
||||
parser.set_operation_limit(3);
|
||||
let tree = parser.parse_str("[1234, 5, 6, 7, 8]", None);
|
||||
assert!(tree.is_none());
|
||||
|
||||
// Without calling reset, the parser continues from where it left off, so
|
||||
// it does not see the changes to the beginning of the source code.
|
||||
parser.set_operation_limit(usize::MAX);
|
||||
let tree = parser.parse_str("[null, 5, 6, 4, 5]", None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(value (array (number) (number) (number) (number) (number)))"
|
||||
);
|
||||
|
||||
parser.set_operation_limit(3);
|
||||
let tree = parser.parse_str("[1234, 5, 6, 7, 8]", None);
|
||||
assert!(tree.is_none());
|
||||
|
||||
// By calling reset, we force the parser to start over from scratch so
|
||||
// that it sees the changes to the beginning of the source code.
|
||||
parser.set_operation_limit(usize::MAX);
|
||||
parser.reset();
|
||||
let tree = parser.parse_str("[null, 5, 6, 4, 5]", None).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(value (array (null) (number) (number) (number) (number)))"
|
||||
);
|
||||
}
|
||||
|
||||
// Included Ranges
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_one_included_range() {
|
||||
let source_code = "<span>hi</span><script>console.log('sup');</script>";
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("html")).unwrap();
|
||||
let html_tree = parser.parse_str(source_code, None).unwrap();
|
||||
let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap();
|
||||
assert_eq!(script_content_node.kind(), "raw_text");
|
||||
|
||||
parser.set_included_ranges(&[script_content_node.range()]);
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
let js_tree = parser.parse_str(source_code, None).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
js_tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program (expression_statement (call_expression",
|
||||
" (member_expression (identifier) (property_identifier))",
|
||||
" (arguments (string)))))",
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
js_tree.root_node().start_position(),
|
||||
Point::new(0, source_code.find("console").unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_multiple_included_ranges() {
|
||||
let source_code = "html `<div>Hello, ${name.toUpperCase()}, it's <b>${now()}</b>.</div>`";
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
let js_tree = parser.parse_str(source_code, None).unwrap();
|
||||
let template_string_node = js_tree
|
||||
.root_node()
|
||||
.descendant_for_byte_range(
|
||||
source_code.find("<div>").unwrap(),
|
||||
source_code.find("Hello").unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(template_string_node.kind(), "template_string");
|
||||
|
||||
let open_quote_node = template_string_node.child(0).unwrap();
|
||||
let interpolation_node1 = template_string_node.child(1).unwrap();
|
||||
let interpolation_node2 = template_string_node.child(2).unwrap();
|
||||
let close_quote_node = template_string_node.child(3).unwrap();
|
||||
|
||||
parser.set_language(get_language("html")).unwrap();
|
||||
parser.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: open_quote_node.end_byte(),
|
||||
start_point: open_quote_node.end_position(),
|
||||
end_byte: interpolation_node1.start_byte(),
|
||||
end_point: interpolation_node1.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node1.end_byte(),
|
||||
start_point: interpolation_node1.end_position(),
|
||||
end_byte: interpolation_node2.start_byte(),
|
||||
end_point: interpolation_node2.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node2.end_byte(),
|
||||
start_point: interpolation_node2.end_position(),
|
||||
end_byte: close_quote_node.start_byte(),
|
||||
end_point: close_quote_node.start_position(),
|
||||
},
|
||||
]);
|
||||
let html_tree = parser.parse_str(source_code, None).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
html_tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(fragment (element",
|
||||
" (start_tag (tag_name))",
|
||||
" (text)",
|
||||
" (element (start_tag (tag_name)) (end_tag (tag_name)))",
|
||||
" (text)",
|
||||
" (end_tag (tag_name))))",
|
||||
)
|
||||
);
|
||||
|
||||
let div_element_node = html_tree.root_node().child(0).unwrap();
|
||||
let hello_text_node = div_element_node.child(1).unwrap();
|
||||
let b_element_node = div_element_node.child(2).unwrap();
|
||||
let b_start_tag_node = b_element_node.child(0).unwrap();
|
||||
let b_end_tag_node = b_element_node.child(1).unwrap();
|
||||
|
||||
assert_eq!(hello_text_node.kind(), "text");
|
||||
assert_eq!(
|
||||
hello_text_node.start_byte(),
|
||||
source_code.find("Hello").unwrap()
|
||||
);
|
||||
assert_eq!(hello_text_node.end_byte(), source_code.find("<b>").unwrap());
|
||||
|
||||
assert_eq!(b_start_tag_node.kind(), "start_tag");
|
||||
assert_eq!(
|
||||
b_start_tag_node.start_byte(),
|
||||
source_code.find("<b>").unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
b_start_tag_node.end_byte(),
|
||||
source_code.find("${now()}").unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(b_end_tag_node.kind(), "end_tag");
|
||||
assert_eq!(
|
||||
b_end_tag_node.start_byte(),
|
||||
source_code.find("</b>").unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
b_end_tag_node.end_byte(),
|
||||
source_code.find(".</div>").unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
|
||||
let source_code = "<script>a.</script>";
|
||||
let utf16_source_code: Vec<u16> = source_code.as_bytes().iter().map(|c| *c as u16).collect();
|
||||
|
||||
let start_byte = 2 * source_code.find("a.").unwrap();
|
||||
let end_byte = 2 * source_code.find("</script>").unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
parser.set_included_ranges(&[Range {
|
||||
start_byte,
|
||||
end_byte,
|
||||
start_point: Point::new(0, start_byte),
|
||||
end_point: Point::new(0, end_byte),
|
||||
}]);
|
||||
let tree = parser
|
||||
.parse_utf16(&mut |i, _| &utf16_source_code[i..], None)
|
||||
.unwrap();
|
||||
assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() {
|
||||
let source_code = "a <%= b() %> c <% d() %>";
|
||||
let range1_start_byte = source_code.find(" b() ").unwrap();
|
||||
let range1_end_byte = range1_start_byte + " b() ".len();
|
||||
let range2_start_byte = source_code.find(" d() ").unwrap();
|
||||
let range2_end_byte = range2_start_byte + " d() ".len();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
parser.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: range1_start_byte,
|
||||
end_byte: range1_end_byte,
|
||||
start_point: Point::new(0, range1_start_byte),
|
||||
end_point: Point::new(0, range1_end_byte),
|
||||
},
|
||||
Range {
|
||||
start_byte: range2_start_byte,
|
||||
end_byte: range2_end_byte,
|
||||
start_point: Point::new(0, range2_start_byte),
|
||||
end_point: Point::new(0, range2_end_byte),
|
||||
},
|
||||
]);
|
||||
|
||||
let tree = parser.parse_str(source_code, None).unwrap();
|
||||
let root = tree.root_node();
|
||||
let statement1 = root.child(0).unwrap();
|
||||
let statement2 = root.child(1).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
root.to_sexp(),
|
||||
concat!(
|
||||
"(program",
|
||||
" (expression_statement (call_expression (identifier) (arguments)))",
|
||||
" (expression_statement (call_expression (identifier) (arguments))))"
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(statement1.start_byte(), source_code.find("b()").unwrap());
|
||||
assert_eq!(statement1.end_byte(), source_code.find(" %> c").unwrap());
|
||||
assert_eq!(statement2.start_byte(), source_code.find("d()").unwrap());
|
||||
assert_eq!(statement2.end_byte(), source_code.len() - " %>".len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_a_newly_excluded_range() {
|
||||
let mut source_code = String::from("<div><span><%= something %></span></div>");
|
||||
|
||||
// Parse HTML including the template directive, which will cause an error
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("html")).unwrap();
|
||||
let mut first_tree = parser.parse_str(&source_code, None).unwrap();
|
||||
|
||||
// Insert code at the beginning of the document.
|
||||
let prefix = "a very very long line of plain text. ";
|
||||
first_tree.edit(&InputEdit {
|
||||
start_byte: 0,
|
||||
old_end_byte: 0,
|
||||
new_end_byte: prefix.len(),
|
||||
start_position: Point::new(0, 0),
|
||||
old_end_position: Point::new(0, 0),
|
||||
new_end_position: Point::new(0, prefix.len()),
|
||||
});
|
||||
source_code.insert_str(0, prefix);
|
||||
|
||||
// Parse the HTML again, this time *excluding* the template directive
|
||||
// (which has moved since the previous parse).
|
||||
let directive_start = source_code.find("<%=").unwrap();
|
||||
let directive_end = source_code.find("</span>").unwrap();
|
||||
let source_code_end = source_code.len();
|
||||
parser.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: 0,
|
||||
end_byte: directive_start,
|
||||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(0, directive_start),
|
||||
},
|
||||
Range {
|
||||
start_byte: directive_end,
|
||||
end_byte: source_code_end,
|
||||
start_point: Point::new(0, directive_end),
|
||||
end_point: Point::new(0, source_code_end),
|
||||
},
|
||||
]);
|
||||
let tree = parser.parse_str(&source_code, Some(&first_tree)).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(fragment (text) (element",
|
||||
" (start_tag (tag_name))",
|
||||
" (element (start_tag (tag_name)) (end_tag (tag_name)))",
|
||||
" (end_tag (tag_name))))"
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
tree.changed_ranges(&first_tree),
|
||||
vec![
|
||||
// The first range that has changed syntax is the range of the newly-inserted text.
|
||||
Range {
|
||||
start_byte: 0,
|
||||
end_byte: prefix.len(),
|
||||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(0, prefix.len()),
|
||||
},
|
||||
// Even though no edits were applied to the outer `div` element,
|
||||
// its contents have changed syntax because a range of text that
|
||||
// was previously included is now excluded.
|
||||
Range {
|
||||
start_byte: directive_start,
|
||||
end_byte: directive_end,
|
||||
start_point: Point::new(0, directive_start),
|
||||
end_point: Point::new(0, directive_end),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_a_newly_included_range() {
|
||||
let source_code = "<div><%= foo() %></div><div><%= bar() %>";
|
||||
let first_code_start_index = source_code.find(" foo").unwrap();
|
||||
let first_code_end_index = first_code_start_index + 7;
|
||||
let second_code_start_index = source_code.find(" bar").unwrap();
|
||||
let second_code_end_index = second_code_start_index + 7;
|
||||
let ranges = [
|
||||
Range {
|
||||
start_byte: first_code_start_index,
|
||||
end_byte: first_code_end_index,
|
||||
start_point: Point::new(0, first_code_start_index),
|
||||
end_point: Point::new(0, first_code_end_index),
|
||||
},
|
||||
Range {
|
||||
start_byte: second_code_start_index,
|
||||
end_byte: second_code_end_index,
|
||||
start_point: Point::new(0, second_code_start_index),
|
||||
end_point: Point::new(0, second_code_end_index),
|
||||
},
|
||||
];
|
||||
|
||||
// Parse only the first code directive as JavaScript
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
parser.set_included_ranges(&ranges[0..1]);
|
||||
let first_tree = parser.parse_str(source_code, None).unwrap();
|
||||
assert_eq!(
|
||||
first_tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program",
|
||||
" (expression_statement (call_expression (identifier) (arguments))))",
|
||||
)
|
||||
);
|
||||
|
||||
// Parse both the code directives as JavaScript, using the old tree as a reference.
|
||||
parser.set_included_ranges(&ranges);
|
||||
let tree = parser.parse_str(&source_code, Some(&first_tree)).unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
"(program",
|
||||
" (expression_statement (call_expression (identifier) (arguments)))",
|
||||
" (expression_statement (call_expression (identifier) (arguments))))",
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
tree.changed_ranges(&first_tree),
|
||||
vec![Range {
|
||||
start_byte: first_code_end_index + 1,
|
||||
end_byte: second_code_end_index + 1,
|
||||
start_point: Point::new(0, first_code_end_index + 1),
|
||||
end_point: Point::new(0, second_code_end_index + 1),
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_included_ranges_and_missing_tokens() {
|
||||
let (parser_name, parser_code) = generate_parser_for_grammar(
|
||||
r#"{
|
||||
"name": "test_leading_missing_token",
|
||||
"rules": {
|
||||
"program": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "A"},
|
||||
{"type": "SYMBOL", "name": "b"},
|
||||
{"type": "SYMBOL", "name": "c"},
|
||||
{"type": "SYMBOL", "name": "A"},
|
||||
{"type": "SYMBOL", "name": "b"},
|
||||
{"type": "SYMBOL", "name": "c"}
|
||||
]
|
||||
},
|
||||
"A": {"type": "SYMBOL", "name": "a"},
|
||||
"a": {"type": "STRING", "value": "a"},
|
||||
"b": {"type": "STRING", "value": "b"},
|
||||
"c": {"type": "STRING", "value": "c"}
|
||||
}
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(get_test_language(&parser_name, &parser_code, None))
|
||||
.unwrap();
|
||||
|
||||
// There's a missing `a` token at the beginning of the code. It must be inserted
|
||||
// at the beginning of the first included range, not at {0, 0}.
|
||||
let source_code = "__bc__bc__";
|
||||
parser.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: 2,
|
||||
end_byte: 4,
|
||||
start_point: Point::new(0, 2),
|
||||
end_point: Point::new(0, 4),
|
||||
},
|
||||
Range {
|
||||
start_byte: 6,
|
||||
end_byte: 8,
|
||||
start_point: Point::new(0, 6),
|
||||
end_point: Point::new(0, 8),
|
||||
},
|
||||
]);
|
||||
|
||||
let tree = parser.parse_str(source_code, None).unwrap();
|
||||
let root = tree.root_node();
|
||||
assert_eq!(
|
||||
root.to_sexp(),
|
||||
"(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))"
|
||||
);
|
||||
assert_eq!(root.start_byte(), 2);
|
||||
assert_eq!(root.child(3).unwrap().start_byte(), 4);
|
||||
}
|
||||
134
cli/src/tests/properties_test.rs
Normal file
134
cli/src/tests/properties_test.rs
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
use super::helpers::fixtures::get_language;
|
||||
use crate::properties;
|
||||
use serde_derive::Deserialize;
|
||||
use tree_sitter::{Parser, PropertySheet};
|
||||
|
||||
#[derive(Debug, Default, Deserialize, PartialEq, Eq)]
|
||||
struct Properties {
|
||||
a: Option<String>,
|
||||
b: Option<String>,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_with_properties_with_nth_child() {
|
||||
let language = get_language("javascript");
|
||||
let property_sheet = PropertySheet::<Properties>::new(
|
||||
language,
|
||||
&properties::generate_property_sheet_string(
|
||||
"/some/path.css",
|
||||
"
|
||||
binary_expression > identifier:nth-child(2) {
|
||||
a: x;
|
||||
}
|
||||
|
||||
binary_expression > identifier {
|
||||
a: y;
|
||||
}
|
||||
|
||||
identifier {
|
||||
a: z;
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let source_code = "a = b || c;";
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse_str(source_code, None).unwrap();
|
||||
|
||||
let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
|
||||
assert_eq!(cursor.node().kind(), "program");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "expression_statement");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "assignment_expression");
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(*cursor.node_properties(), Properties { a: Some("z".to_string()), b: None });
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "=");
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "binary_expression");
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(*cursor.node_properties(), Properties { a: Some("y".to_string()), b: None });
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "||");
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(*cursor.node_properties(), Properties { a: Some("x".to_string()), b: None });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_with_properties_with_regexes() {
|
||||
let language = get_language("javascript");
|
||||
let property_sheet = PropertySheet::<Properties>::new(
|
||||
language,
|
||||
&properties::generate_property_sheet_string(
|
||||
"/some/path.css",
|
||||
"
|
||||
identifier {
|
||||
&[text='^[A-Z]'] {
|
||||
a: y;
|
||||
}
|
||||
|
||||
&[text='^[A-Z_]+$'] {
|
||||
a: z;
|
||||
}
|
||||
|
||||
a: x;
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let source_code = "const ABC = Def(ghi);";
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse_str(source_code, None).unwrap();
|
||||
|
||||
let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
|
||||
assert_eq!(cursor.node().kind(), "program");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "lexical_declaration");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "const");
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "variable_declarator");
|
||||
|
||||
// The later selector with a text regex overrides the earlier one.
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(*cursor.node_properties(), Properties { a: Some("z".to_string()), b: None });
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "=");
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "call_expression");
|
||||
|
||||
// The selectors with text regexes override the selector without one.
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(*cursor.node_properties(), Properties { a: Some("y".to_string()), b: None });
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "arguments");
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "(");
|
||||
|
||||
// This node doesn't match either of the regexes.
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "identifier");
|
||||
assert_eq!(*cursor.node_properties(), Properties { a: Some("x".to_string()), b: None });
|
||||
}
|
||||
379
cli/src/tests/tree_test.rs
Normal file
379
cli/src/tests/tree_test.rs
Normal file
|
|
@ -0,0 +1,379 @@
|
|||
use super::helpers::edits::{invert_edit, perform_edit, Edit};
|
||||
use super::helpers::fixtures::get_language;
|
||||
use std::str;
|
||||
use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
|
||||
|
||||
#[test]
|
||||
fn test_tree_edit() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
let tree = parser.parse_str(" abc !== def", None).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(program (expression_statement (binary_expression (identifier) (identifier))))"
|
||||
);
|
||||
|
||||
// edit entirely within the tree's padding:
|
||||
// resize the padding of the tree and its leftmost descendants.
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
tree.edit(&InputEdit {
|
||||
start_byte: 1,
|
||||
old_end_byte: 1,
|
||||
new_end_byte: 2,
|
||||
start_position: Point::new(0, 1),
|
||||
old_end_position: Point::new(0, 1),
|
||||
new_end_position: Point::new(0, 2),
|
||||
});
|
||||
|
||||
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
|
||||
let child1 = expr.child(0).unwrap();
|
||||
let child2 = expr.child(1).unwrap();
|
||||
|
||||
assert!(expr.has_changes());
|
||||
assert_eq!(expr.start_byte(), 3);
|
||||
assert_eq!(expr.end_byte(), 16);
|
||||
assert!(child1.has_changes());
|
||||
assert_eq!(child1.start_byte(), 3);
|
||||
assert_eq!(child1.end_byte(), 6);
|
||||
assert!(!child2.has_changes());
|
||||
assert_eq!(child2.start_byte(), 8);
|
||||
assert_eq!(child2.end_byte(), 11);
|
||||
}
|
||||
|
||||
// edit starting in the tree's padding but extending into its content:
|
||||
// shrink the content to compenstate for the expanded padding.
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
tree.edit(&InputEdit {
|
||||
start_byte: 1,
|
||||
old_end_byte: 4,
|
||||
new_end_byte: 5,
|
||||
start_position: Point::new(0, 1),
|
||||
old_end_position: Point::new(0, 5),
|
||||
new_end_position: Point::new(0, 5),
|
||||
});
|
||||
|
||||
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
|
||||
let child1 = expr.child(0).unwrap();
|
||||
let child2 = expr.child(1).unwrap();
|
||||
|
||||
assert!(expr.has_changes());
|
||||
assert_eq!(expr.start_byte(), 5);
|
||||
assert_eq!(expr.end_byte(), 16);
|
||||
assert!(child1.has_changes());
|
||||
assert_eq!(child1.start_byte(), 5);
|
||||
assert_eq!(child1.end_byte(), 6);
|
||||
assert!(!child2.has_changes());
|
||||
assert_eq!(child2.start_byte(), 8);
|
||||
assert_eq!(child2.end_byte(), 11);
|
||||
}
|
||||
|
||||
// insertion at the edge of a tree's padding:
|
||||
// expand the tree's padding.
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
tree.edit(&InputEdit {
|
||||
start_byte: 2,
|
||||
old_end_byte: 2,
|
||||
new_end_byte: 4,
|
||||
start_position: Point::new(0, 2),
|
||||
old_end_position: Point::new(0, 2),
|
||||
new_end_position: Point::new(0, 4),
|
||||
});
|
||||
|
||||
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
|
||||
let child1 = expr.child(0).unwrap();
|
||||
let child2 = expr.child(1).unwrap();
|
||||
|
||||
assert!(expr.has_changes());
|
||||
assert_eq!(expr.start_byte(), 4);
|
||||
assert_eq!(expr.end_byte(), 17);
|
||||
assert!(child1.has_changes());
|
||||
assert_eq!(child1.start_byte(), 4);
|
||||
assert_eq!(child1.end_byte(), 7);
|
||||
assert!(!child2.has_changes());
|
||||
assert_eq!(child2.start_byte(), 9);
|
||||
assert_eq!(child2.end_byte(), 12);
|
||||
}
|
||||
|
||||
// replacement starting at the edge of the tree's padding:
|
||||
// resize the content and not the padding.
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
tree.edit(&InputEdit {
|
||||
start_byte: 2,
|
||||
old_end_byte: 2,
|
||||
new_end_byte: 4,
|
||||
start_position: Point::new(0, 2),
|
||||
old_end_position: Point::new(0, 2),
|
||||
new_end_position: Point::new(0, 4),
|
||||
});
|
||||
|
||||
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
|
||||
let child1 = expr.child(0).unwrap();
|
||||
let child2 = expr.child(1).unwrap();
|
||||
|
||||
assert!(expr.has_changes());
|
||||
assert_eq!(expr.start_byte(), 4);
|
||||
assert_eq!(expr.end_byte(), 17);
|
||||
assert!(child1.has_changes());
|
||||
assert_eq!(child1.start_byte(), 4);
|
||||
assert_eq!(child1.end_byte(), 7);
|
||||
assert!(!child2.has_changes());
|
||||
assert_eq!(child2.start_byte(), 9);
|
||||
assert_eq!(child2.end_byte(), 12);
|
||||
}
|
||||
|
||||
// deletion that spans more than one child node:
|
||||
// shrink subsequent child nodes.
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
tree.edit(&InputEdit {
|
||||
start_byte: 1,
|
||||
old_end_byte: 11,
|
||||
new_end_byte: 4,
|
||||
start_position: Point::new(0, 1),
|
||||
old_end_position: Point::new(0, 11),
|
||||
new_end_position: Point::new(0, 4),
|
||||
});
|
||||
|
||||
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
|
||||
let child1 = expr.child(0).unwrap();
|
||||
let child2 = expr.child(1).unwrap();
|
||||
let child3 = expr.child(2).unwrap();
|
||||
|
||||
assert!(expr.has_changes());
|
||||
assert_eq!(expr.start_byte(), 4);
|
||||
assert_eq!(expr.end_byte(), 8);
|
||||
assert!(child1.has_changes());
|
||||
assert_eq!(child1.start_byte(), 4);
|
||||
assert_eq!(child1.end_byte(), 4);
|
||||
assert!(child2.has_changes());
|
||||
assert_eq!(child2.start_byte(), 4);
|
||||
assert_eq!(child2.end_byte(), 4);
|
||||
assert!(child3.has_changes());
|
||||
assert_eq!(child3.start_byte(), 5);
|
||||
assert_eq!(child3.end_byte(), 8);
|
||||
}
|
||||
|
||||
// insertion at the end of the tree:
|
||||
// extend the tree's content.
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
tree.edit(&InputEdit {
|
||||
start_byte: 15,
|
||||
old_end_byte: 15,
|
||||
new_end_byte: 16,
|
||||
start_position: Point::new(0, 15),
|
||||
old_end_position: Point::new(0, 15),
|
||||
new_end_position: Point::new(0, 16),
|
||||
});
|
||||
|
||||
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
|
||||
let child1 = expr.child(0).unwrap();
|
||||
let child2 = expr.child(1).unwrap();
|
||||
let child3 = expr.child(2).unwrap();
|
||||
|
||||
assert!(expr.has_changes());
|
||||
assert_eq!(expr.start_byte(), 2);
|
||||
assert_eq!(expr.end_byte(), 16);
|
||||
assert!(!child1.has_changes());
|
||||
assert_eq!(child1.end_byte(), 5);
|
||||
assert!(!child2.has_changes());
|
||||
assert_eq!(child2.end_byte(), 10);
|
||||
assert!(child3.has_changes());
|
||||
assert_eq!(child3.end_byte(), 16);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tree_walk() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
|
||||
let tree = parser
|
||||
.parse_str(
|
||||
"
|
||||
struct Stuff {
|
||||
a: A;
|
||||
b: Option<B>,
|
||||
}
|
||||
",
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut cursor = tree.walk();
|
||||
assert_eq!(cursor.node().kind(), "source_file");
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "struct_item");
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
assert_eq!(cursor.node().kind(), "struct");
|
||||
assert_eq!(cursor.node().is_named(), false);
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "type_identifier");
|
||||
assert_eq!(cursor.node().is_named(), true);
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert_eq!(cursor.node().kind(), "field_declaration_list");
|
||||
assert_eq!(cursor.node().is_named(), true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tree_node_equality() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("rust")).unwrap();
|
||||
let tree = parser.parse_str("struct A {}", None).unwrap();
|
||||
let node1 = tree.root_node();
|
||||
let node2 = tree.root_node();
|
||||
assert_eq!(node1, node2);
|
||||
assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
|
||||
assert_ne!(node1.child(0).unwrap(), node2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_changed_ranges() {
|
||||
let source_code = b"{a: null};\n".to_vec();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("javascript")).unwrap();
|
||||
let tree = parser
|
||||
.parse_utf8(&mut |i, _| &source_code[i..], None)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(program (expression_statement (object (pair (property_identifier) (null)))))"
|
||||
);
|
||||
|
||||
// Updating one token
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
let mut source_code = source_code.clone();
|
||||
|
||||
// Replace `null` with `nothing` - that token has changed syntax
|
||||
let edit = Edit {
|
||||
position: index_of(&source_code, "ull"),
|
||||
deleted_length: 3,
|
||||
inserted_text: b"othing".to_vec(),
|
||||
};
|
||||
let inverse_edit = invert_edit(&source_code, &edit);
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit);
|
||||
assert_eq!(ranges, vec![range_of(&source_code, "nothing")]);
|
||||
|
||||
// Replace `nothing` with `null` - that token has changed syntax
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit);
|
||||
assert_eq!(ranges, vec![range_of(&source_code, "null")]);
|
||||
}
|
||||
|
||||
// Changing only leading whitespace
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
let mut source_code = source_code.clone();
|
||||
|
||||
// Insert leading newline - no changed ranges
|
||||
let edit = Edit {
|
||||
position: 0,
|
||||
deleted_length: 0,
|
||||
inserted_text: b"\n".to_vec(),
|
||||
};
|
||||
let inverse_edit = invert_edit(&source_code, &edit);
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit);
|
||||
assert_eq!(ranges, vec![]);
|
||||
|
||||
// Remove leading newline - no changed ranges
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit);
|
||||
assert_eq!(ranges, vec![]);
|
||||
}
|
||||
|
||||
// Inserting elements
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
let mut source_code = source_code.clone();
|
||||
|
||||
// Insert a key-value pair before the `}` - those tokens are changed
|
||||
let edit1 = Edit {
|
||||
position: index_of(&source_code, "}"),
|
||||
deleted_length: 0,
|
||||
inserted_text: b", b: false".to_vec(),
|
||||
};
|
||||
let inverse_edit1 = invert_edit(&source_code, &edit1);
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1);
|
||||
assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]);
|
||||
|
||||
let edit2 = Edit {
|
||||
position: index_of(&source_code, ", b"),
|
||||
deleted_length: 0,
|
||||
inserted_text: b", c: 1".to_vec(),
|
||||
};
|
||||
let inverse_edit2 = invert_edit(&source_code, &edit2);
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit2);
|
||||
assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]);
|
||||
|
||||
// Remove the middle pair
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit2);
|
||||
assert_eq!(ranges, vec![]);
|
||||
|
||||
// Remove the second pair
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1);
|
||||
assert_eq!(ranges, vec![]);
|
||||
}
|
||||
|
||||
// Wrapping elements in larger expressions
|
||||
{
|
||||
let mut tree = tree.clone();
|
||||
let mut source_code = source_code.clone();
|
||||
|
||||
// Replace `null` with the binary expression `b === null`
|
||||
let edit1 = Edit {
|
||||
position: index_of(&source_code, "null"),
|
||||
deleted_length: 0,
|
||||
inserted_text: b"b === ".to_vec(),
|
||||
};
|
||||
let inverse_edit1 = invert_edit(&source_code, &edit1);
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1);
|
||||
assert_eq!(ranges, vec![range_of(&source_code, "b === null")]);
|
||||
|
||||
// Undo
|
||||
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1);
|
||||
assert_eq!(ranges, vec![range_of(&source_code, "null")]);
|
||||
}
|
||||
}
|
||||
|
||||
fn index_of(text: &Vec<u8>, substring: &str) -> usize {
|
||||
str::from_utf8(text.as_slice())
|
||||
.unwrap()
|
||||
.find(substring)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn range_of(text: &Vec<u8>, substring: &str) -> Range {
|
||||
let start_byte = index_of(text, substring);
|
||||
let end_byte = start_byte + substring.as_bytes().len();
|
||||
Range {
|
||||
start_byte,
|
||||
end_byte,
|
||||
start_point: Point::new(0, start_byte),
|
||||
end_point: Point::new(0, end_byte),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_changed_ranges(
|
||||
parser: &mut Parser,
|
||||
tree: &mut Tree,
|
||||
source_code: &mut Vec<u8>,
|
||||
edit: Edit,
|
||||
) -> Vec<Range> {
|
||||
perform_edit(tree, source_code, &edit);
|
||||
let new_tree = parser
|
||||
.parse_utf8(&mut |i, _| &source_code[i..], Some(tree))
|
||||
.unwrap();
|
||||
let result = tree.changed_ranges(&new_tree);
|
||||
*tree = new_tree;
|
||||
result
|
||||
}
|
||||
66
cli/src/util.rs
Normal file
66
cli/src/util.rs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
#[cfg(unix)]
|
||||
use std::path::PathBuf;
|
||||
#[cfg(unix)]
|
||||
use std::process::{Child, ChildStdin, Command, Stdio};
|
||||
use tree_sitter::Parser;
|
||||
|
||||
#[cfg(unix)]
|
||||
const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
|
||||
|
||||
#[cfg(windows)]
|
||||
pub struct LogSession();
|
||||
|
||||
#[cfg(unix)]
|
||||
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
|
||||
Ok(LogSession())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
|
||||
use std::io::Write;
|
||||
|
||||
let mut dot_file = std::fs::File::create(path)?;
|
||||
dot_file.write(HTML_HEADER)?;
|
||||
let mut dot_process = Command::new("dot")
|
||||
.arg("-Tsvg")
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(dot_file)
|
||||
.spawn()
|
||||
.expect("Failed to run Dot");
|
||||
let dot_stdin = dot_process
|
||||
.stdin
|
||||
.take()
|
||||
.expect("Failed to open stdin for Dot");
|
||||
parser.print_dot_graphs(&dot_stdin);
|
||||
Ok(LogSession(
|
||||
PathBuf::from(path),
|
||||
Some(dot_process),
|
||||
Some(dot_stdin),
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
impl Drop for LogSession {
|
||||
fn drop(&mut self) {
|
||||
use std::fs;
|
||||
|
||||
drop(self.2.take().unwrap());
|
||||
let output = self.1.take().unwrap().wait_with_output().unwrap();
|
||||
if output.status.success() {
|
||||
if cfg!(target_os = "macos")
|
||||
&& fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64
|
||||
{
|
||||
Command::new("open").arg(&self.0).output().unwrap();
|
||||
}
|
||||
} else {
|
||||
eprintln!(
|
||||
"Dot failed: {} {}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
1
externals/bandit
vendored
1
externals/bandit
vendored
|
|
@ -1 +0,0 @@
|
|||
Subproject commit bfdb8a3322a2e54b11aea64d84f9788d83477e83
|
||||
1
externals/crypto-algorithms
vendored
1
externals/crypto-algorithms
vendored
|
|
@ -1 +0,0 @@
|
|||
Subproject commit c7e5c23ab04ecfb5465cbefbe17ba23d4cb3bc9d
|
||||
1
externals/gyp
vendored
1
externals/gyp
vendored
|
|
@ -1 +0,0 @@
|
|||
Subproject commit e0ee72ddc7fb97eb33d530cf684efcbe4d27ecb3
|
||||
1
externals/json-parser
vendored
1
externals/json-parser
vendored
|
|
@ -1 +0,0 @@
|
|||
Subproject commit 70533215eea575e40a0b91a34ae01a779641d73a
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
#ifndef TREE_SITTER_COMPILER_H_
|
||||
#define TREE_SITTER_COMPILER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
typedef enum {
|
||||
TSCompileErrorTypeNone,
|
||||
TSCompileErrorTypeInvalidGrammar,
|
||||
TSCompileErrorTypeInvalidRegex,
|
||||
TSCompileErrorTypeUndefinedSymbol,
|
||||
TSCompileErrorTypeInvalidExtraToken,
|
||||
TSCompileErrorTypeInvalidExternalToken,
|
||||
TSCompileErrorTypeLexConflict,
|
||||
TSCompileErrorTypeParseConflict,
|
||||
TSCompileErrorTypeEpsilonRule,
|
||||
TSCompileErrorTypeInvalidTokenContents,
|
||||
TSCompileErrorTypeInvalidRuleName,
|
||||
TSCompileErrorTypeInvalidWordRule,
|
||||
} TSCompileErrorType;
|
||||
|
||||
typedef struct {
|
||||
char *code;
|
||||
char *error_message;
|
||||
TSCompileErrorType error_type;
|
||||
} TSCompileResult;
|
||||
|
||||
TSCompileResult ts_compile_grammar(const char *input, FILE *log_file);
|
||||
TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_COMPILER_H_
|
||||
32
lib/Cargo.toml
Normal file
32
lib/Cargo.toml
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
[package]
|
||||
name = "tree-sitter"
|
||||
description = "Rust bindings to the Tree-sitter parsing library"
|
||||
version = "0.3.5"
|
||||
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
keywords = ["incremental", "parsing"]
|
||||
categories = ["api-bindings", "parsing", "text-editors"]
|
||||
|
||||
include = [
|
||||
"/build.rs",
|
||||
"/Cargo.toml",
|
||||
"/LICENSE",
|
||||
"/README.md",
|
||||
"/src/*",
|
||||
"/core/tree-sitter/externals/utf8proc/utf8proc*",
|
||||
"/core/tree-sitter/include/*",
|
||||
"/core/tree-sitter/src/runtime/*",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
regex = "1"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
serde_derive = "1.0"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0"
|
||||
|
||||
[lib]
|
||||
path = "binding/lib.rs"
|
||||
98
lib/README.md
Normal file
98
lib/README.md
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
Rust Tree-sitter
|
||||
===========================
|
||||
|
||||
[](https://travis-ci.org/tree-sitter/rust-tree-sitter)
|
||||
[](https://ci.appveyor.com/project/maxbrunsfeld/rust-tree-sitter/branch/master)
|
||||
[](https://crates.io/crates/tree-sitter)
|
||||
|
||||
Rust bindings to the [Tree-sitter][] parsing library.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
First, create a parser:
|
||||
|
||||
```rust
|
||||
use tree_sitter::{Parser, Language};
|
||||
|
||||
// ...
|
||||
|
||||
let mut parser = Parser::new();
|
||||
```
|
||||
|
||||
Then assign a language to the parser. Tree-sitter languages consist of generated C code. To use them from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`:
|
||||
|
||||
```rust
|
||||
extern "C" { fn tree_sitter_c() -> Language; }
|
||||
extern "C" { fn tree_sitter_rust() -> Language; }
|
||||
extern "C" { fn tree_sitter_javascript() -> Language; }
|
||||
|
||||
let language = unsafe { tree_sitter_rust() };
|
||||
parser.set_language(language).unwrap();
|
||||
```
|
||||
|
||||
Now you can parse source code:
|
||||
|
||||
```rust
|
||||
let source_code = "fn test() {}";
|
||||
let tree = parser.parse_str(source_code, None);
|
||||
let root_node = tree.root_node();
|
||||
|
||||
assert_eq!(root_node.kind(), "source_file");
|
||||
assert_eq!(root_node.start_position().column, 0);
|
||||
assert_eq!(root_node.end_position().column, 12);
|
||||
```
|
||||
|
||||
### Editing
|
||||
|
||||
Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly:
|
||||
|
||||
```rust
|
||||
let new_source_code = "fn test(a: u32) {}"
|
||||
|
||||
tree.edit(InputEdit {
|
||||
start_byte: 8,
|
||||
old_end_byte: 8,
|
||||
new_end_byte: 14,
|
||||
start_position: Point::new(0, 8),
|
||||
old_end_position: Point::new(0, 8),
|
||||
new_end_position: Point::new(0, 14),
|
||||
});
|
||||
|
||||
let new_tree = parser.parse_str(new_source_code, Some(&tree));
|
||||
```
|
||||
|
||||
### Text Input
|
||||
|
||||
The source code to parse can be provided either as a string or as a function that returns text encoded as either UTF8 or UTF16:
|
||||
|
||||
```rust
|
||||
// Store some source code in an array of lines.
|
||||
let lines = &[
|
||||
"pub fn foo() {",
|
||||
" 1",
|
||||
"}",
|
||||
];
|
||||
|
||||
// Parse the source code using a custom callback. The callback is called
|
||||
// with both a byte offset and a row/column offset.
|
||||
let tree = parser.parse_utf8(&mut |_byte: u32, position: Point| -> &[u8] {
|
||||
let row = position.row as usize;
|
||||
let column = position.column as usize;
|
||||
if row < lines.len() {
|
||||
if column < lines[row].as_bytes().len() {
|
||||
&lines[row].as_bytes()[column..]
|
||||
} else {
|
||||
"\n".as_bytes()
|
||||
}
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}, None).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
"(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"
|
||||
);
|
||||
```
|
||||
|
||||
[tree-sitter]: https://github.com/tree-sitter/tree-sitter
|
||||
323
lib/binding/bindings.rs
Normal file
323
lib/binding/bindings.rs
Normal file
|
|
@ -0,0 +1,323 @@
|
|||
/* automatically generated by rust-bindgen */
|
||||
|
||||
pub type __darwin_size_t = ::std::os::raw::c_ulong;
|
||||
pub type FILE = [u64; 19usize];
|
||||
pub type TSSymbol = u16;
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSLanguage {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSParser {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSTree {
|
||||
_unused: [u8; 0],
|
||||
}
|
||||
pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0;
|
||||
pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1;
|
||||
pub type TSInputEncoding = u32;
|
||||
pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0;
|
||||
pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1;
|
||||
pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2;
|
||||
pub type TSSymbolType = u32;
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSPoint {
|
||||
pub row: u32,
|
||||
pub column: u32,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSRange {
|
||||
pub start_point: TSPoint,
|
||||
pub end_point: TSPoint,
|
||||
pub start_byte: u32,
|
||||
pub end_byte: u32,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSInput {
|
||||
pub payload: *mut ::std::os::raw::c_void,
|
||||
pub read: ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
payload: *mut ::std::os::raw::c_void,
|
||||
byte_index: u32,
|
||||
position: TSPoint,
|
||||
bytes_read: *mut u32,
|
||||
) -> *const ::std::os::raw::c_char,
|
||||
>,
|
||||
pub encoding: TSInputEncoding,
|
||||
}
|
||||
pub const TSLogType_TSLogTypeParse: TSLogType = 0;
|
||||
pub const TSLogType_TSLogTypeLex: TSLogType = 1;
|
||||
pub type TSLogType = u32;
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSLogger {
|
||||
pub payload: *mut ::std::os::raw::c_void,
|
||||
pub log: ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
payload: *mut ::std::os::raw::c_void,
|
||||
arg1: TSLogType,
|
||||
arg2: *const ::std::os::raw::c_char,
|
||||
),
|
||||
>,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSInputEdit {
|
||||
pub start_byte: u32,
|
||||
pub old_end_byte: u32,
|
||||
pub new_end_byte: u32,
|
||||
pub start_point: TSPoint,
|
||||
pub old_end_point: TSPoint,
|
||||
pub new_end_point: TSPoint,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSNode {
|
||||
pub context: [u32; 4usize],
|
||||
pub id: *const ::std::os::raw::c_void,
|
||||
pub tree: *const TSTree,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSTreeCursor {
|
||||
pub tree: *const ::std::os::raw::c_void,
|
||||
pub id: *const ::std::os::raw::c_void,
|
||||
pub context: [u32; 2usize],
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_new() -> *mut TSParser;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_delete(arg1: *mut TSParser);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: ::std::os::raw::c_int);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_parse_string(
|
||||
arg1: *mut TSParser,
|
||||
arg2: *const TSTree,
|
||||
arg3: *const ::std::os::raw::c_char,
|
||||
arg4: u32,
|
||||
) -> *mut TSTree;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_parse_string_encoding(
|
||||
arg1: *mut TSParser,
|
||||
arg2: *const TSTree,
|
||||
arg3: *const ::std::os::raw::c_char,
|
||||
arg4: u32,
|
||||
arg5: TSInputEncoding,
|
||||
) -> *mut TSTree;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_enabled(arg1: *const TSParser) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_reset(arg1: *mut TSParser);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_set_included_ranges(arg1: *mut TSParser, arg2: *const TSRange, arg3: u32);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_parser_included_ranges(arg1: *const TSParser, arg2: *mut u32) -> *const TSRange;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_delete(arg1: *mut TSTree);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_get_changed_ranges(
|
||||
arg1: *const TSTree,
|
||||
arg2: *const TSTree,
|
||||
arg3: *mut u32,
|
||||
) -> *mut TSRange;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_start_byte(arg1: TSNode) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_start_point(arg1: TSNode) -> TSPoint;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_end_byte(arg1: TSNode) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_end_point(arg1: TSNode) -> TSPoint;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_is_null(arg1: TSNode) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_is_named(arg1: TSNode) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_is_missing(arg1: TSNode) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_has_changes(arg1: TSNode) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_has_error(arg1: TSNode) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_parent(arg1: TSNode) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_child_count(arg1: TSNode) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_named_child_count(arg1: TSNode) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint)
|
||||
-> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_named_descendant_for_point_range(
|
||||
arg1: TSNode,
|
||||
arg2: TSPoint,
|
||||
arg3: TSPoint,
|
||||
) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_edit(arg1: *mut TSNode, arg2: *const TSInputEdit);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_new(arg1: TSNode) -> TSTreeCursor;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_symbol_name(
|
||||
arg1: *const TSLanguage,
|
||||
arg2: TSSymbol,
|
||||
) -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_symbol_for_name(
|
||||
arg1: *const TSLanguage,
|
||||
arg2: *const ::std::os::raw::c_char,
|
||||
) -> TSSymbol;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
|
||||
}
|
||||
|
||||
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 9;
|
||||
9
lib/binding/ffi.rs
Normal file
9
lib/binding/ffi.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#![allow(dead_code)]
|
||||
#![allow(non_upper_case_globals)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
include!("./bindings.rs");
|
||||
|
||||
extern "C" {
|
||||
pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int;
|
||||
}
|
||||
17
lib/binding/helper.c
Normal file
17
lib/binding/helper.c
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
#if defined(TREE_SITTER_TEST)
|
||||
|
||||
void ts_record_free(void *);
|
||||
|
||||
void rust_tree_sitter_free(void *p) {
|
||||
ts_record_free(p);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void free(void *);
|
||||
|
||||
void rust_tree_sitter_free(void *p) {
|
||||
free(p);
|
||||
}
|
||||
|
||||
#endif
|
||||
922
lib/binding/lib.rs
Normal file
922
lib/binding/lib.rs
Normal file
|
|
@ -0,0 +1,922 @@
|
|||
mod ffi;
|
||||
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
extern crate regex;
|
||||
extern crate serde;
|
||||
extern crate serde_json;
|
||||
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::io::AsRawFd;
|
||||
|
||||
use regex::Regex;
|
||||
use serde::de::DeserializeOwned;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CStr;
|
||||
use std::fmt;
|
||||
use std::io::{self, Read, Seek};
|
||||
use std::marker::PhantomData;
|
||||
use std::os::raw::{c_char, c_void};
|
||||
use std::ptr;
|
||||
use std::slice;
|
||||
use std::str;
|
||||
use std::u16;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[repr(transparent)]
|
||||
pub struct Language(*const ffi::TSLanguage);
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum LogType {
|
||||
Parse,
|
||||
Lex,
|
||||
}
|
||||
|
||||
type Logger<'a> = Box<FnMut(LogType, &str) + 'a>;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Point {
|
||||
pub row: usize,
|
||||
pub column: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Range {
|
||||
pub start_byte: usize,
|
||||
pub end_byte: usize,
|
||||
pub start_point: Point,
|
||||
pub end_point: Point,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct InputEdit {
|
||||
pub start_byte: usize,
|
||||
pub old_end_byte: usize,
|
||||
pub new_end_byte: usize,
|
||||
pub start_position: Point,
|
||||
pub old_end_position: Point,
|
||||
pub new_end_position: Point,
|
||||
}
|
||||
|
||||
struct PropertyTransition {
|
||||
state_id: usize,
|
||||
child_index: Option<usize>,
|
||||
text_regex_index: Option<usize>,
|
||||
}
|
||||
|
||||
struct PropertyState {
|
||||
transitions: HashMap<u16, Vec<PropertyTransition>>,
|
||||
property_set_id: usize,
|
||||
default_next_state_id: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum PropertySheetError {
|
||||
InvalidJSON(serde_json::Error),
|
||||
InvalidRegex(regex::Error),
|
||||
}
|
||||
|
||||
pub struct PropertySheet<P = HashMap<String, String>> {
|
||||
states: Vec<PropertyState>,
|
||||
property_sets: Vec<P>,
|
||||
text_regexes: Vec<Regex>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Hash, PartialEq, Eq)]
|
||||
pub struct PropertyTransitionJSON {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub index: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub text: Option<String>,
|
||||
pub state_id: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)]
|
||||
pub struct PropertyStateJSON {
|
||||
pub id: Option<usize>,
|
||||
pub property_set_id: usize,
|
||||
pub transitions: Vec<PropertyTransitionJSON>,
|
||||
pub default_next_state_id: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct PropertySheetJSON<P> {
|
||||
pub states: Vec<PropertyStateJSON>,
|
||||
pub property_sets: Vec<P>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
|
||||
|
||||
pub struct Parser(*mut ffi::TSParser);
|
||||
|
||||
pub struct Tree(*mut ffi::TSTree);
|
||||
|
||||
pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
|
||||
|
||||
pub struct TreePropertyCursor<'a, P> {
|
||||
cursor: TreeCursor<'a>,
|
||||
state_stack: Vec<usize>,
|
||||
child_index_stack: Vec<usize>,
|
||||
property_sheet: &'a PropertySheet<P>,
|
||||
source: &'a str,
|
||||
}
|
||||
|
||||
impl Language {
|
||||
pub fn node_kind_count(&self) -> usize {
|
||||
unsafe { ffi::ts_language_symbol_count(self.0) as usize }
|
||||
}
|
||||
|
||||
pub fn node_kind_for_id(&self, id: u16) -> &'static str {
|
||||
unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) }
|
||||
.to_str()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn node_kind_is_named(&self, id: u16) -> bool {
|
||||
unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for Language {}
|
||||
|
||||
unsafe impl Sync for Language {}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Parser {
|
||||
unsafe {
|
||||
let parser = ffi::ts_parser_new();
|
||||
Parser(parser)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_language(&mut self, language: Language) -> Result<(), String> {
|
||||
unsafe {
|
||||
let version = ffi::ts_language_version(language.0) as usize;
|
||||
if version == ffi::TREE_SITTER_LANGUAGE_VERSION {
|
||||
ffi::ts_parser_set_language(self.0, language.0);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format!(
|
||||
"Incompatible language version {}. Expected {}.",
|
||||
version,
|
||||
ffi::TREE_SITTER_LANGUAGE_VERSION
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn logger(&self) -> Option<&Logger> {
|
||||
let logger = unsafe { ffi::ts_parser_logger(self.0) };
|
||||
unsafe { (logger.payload as *mut Logger).as_ref() }
|
||||
}
|
||||
|
||||
pub fn set_logger(&mut self, logger: Option<Logger>) {
|
||||
let prev_logger = unsafe { ffi::ts_parser_logger(self.0) };
|
||||
if !prev_logger.payload.is_null() {
|
||||
unsafe { Box::from_raw(prev_logger.payload as *mut Logger) };
|
||||
}
|
||||
|
||||
let c_logger;
|
||||
if let Some(logger) = logger {
|
||||
let container = Box::new(logger);
|
||||
|
||||
unsafe extern "C" fn log(
|
||||
payload: *mut c_void,
|
||||
c_log_type: ffi::TSLogType,
|
||||
c_message: *const c_char,
|
||||
) {
|
||||
let callback = (payload as *mut Logger).as_mut().unwrap();
|
||||
if let Ok(message) = CStr::from_ptr(c_message).to_str() {
|
||||
let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse {
|
||||
LogType::Parse
|
||||
} else {
|
||||
LogType::Lex
|
||||
};
|
||||
callback(log_type, message);
|
||||
}
|
||||
};
|
||||
|
||||
let raw_container = Box::into_raw(container);
|
||||
|
||||
c_logger = ffi::TSLogger {
|
||||
payload: raw_container as *mut c_void,
|
||||
log: Some(log),
|
||||
};
|
||||
} else {
|
||||
c_logger = ffi::TSLogger {
|
||||
payload: ptr::null_mut(),
|
||||
log: None,
|
||||
};
|
||||
}
|
||||
|
||||
unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn print_dot_graphs(&mut self, file: &impl AsRawFd) {
|
||||
let fd = file.as_raw_fd();
|
||||
unsafe { ffi::ts_parser_print_dot_graphs(self.0, ffi::dup(fd)) }
|
||||
}
|
||||
|
||||
pub fn stop_printing_dot_graphs(&mut self) {
|
||||
unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) }
|
||||
}
|
||||
|
||||
pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
|
||||
let bytes = input.as_bytes();
|
||||
self.parse_utf8(
|
||||
&mut |offset, _| {
|
||||
if offset < bytes.len() {
|
||||
&bytes[offset..]
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
},
|
||||
old_tree,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_utf8<'a, T: FnMut(usize, Point) -> &'a [u8]>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> Option<Tree> {
|
||||
self.parse_utf8_ptr(
|
||||
&mut |byte, position| {
|
||||
let slice = input(byte, position);
|
||||
(slice.as_ptr(), slice.len())
|
||||
},
|
||||
old_tree,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_utf16<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> Option<Tree> {
|
||||
self.parse_utf16_ptr(
|
||||
&mut |byte, position| {
|
||||
let slice = input(byte / 2, position);
|
||||
(slice.as_ptr(), slice.len())
|
||||
},
|
||||
old_tree,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_utf8_io(
|
||||
&mut self,
|
||||
mut input: impl Read + Seek,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> io::Result<Option<Tree>> {
|
||||
let mut error = None;
|
||||
let mut current_offset = 0;
|
||||
let mut buffer = [0; 10 * 1024];
|
||||
let result = self.parse_utf8_ptr(
|
||||
&mut |byte, _| {
|
||||
if byte as u64 != current_offset {
|
||||
current_offset = byte as u64;
|
||||
if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) {
|
||||
error = Some(e);
|
||||
return (ptr::null(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
match input.read(&mut buffer) {
|
||||
Err(e) => {
|
||||
error = Some(e);
|
||||
(ptr::null(), 0)
|
||||
}
|
||||
Ok(length) => (buffer.as_ptr(), length),
|
||||
}
|
||||
},
|
||||
old_tree,
|
||||
);
|
||||
|
||||
match error {
|
||||
Some(e) => Err(e),
|
||||
None => Ok(result),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
unsafe { ffi::ts_parser_reset(self.0) }
|
||||
}
|
||||
|
||||
pub fn set_operation_limit(&mut self, limit: usize) {
|
||||
unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
|
||||
}
|
||||
|
||||
pub fn set_included_ranges(&mut self, ranges: &[Range]) {
|
||||
let ts_ranges: Vec<ffi::TSRange> =
|
||||
ranges.iter().cloned().map(|range| range.into()).collect();
|
||||
unsafe {
|
||||
ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32)
|
||||
};
|
||||
}
|
||||
|
||||
fn parse_utf8_ptr<T: FnMut(usize, Point) -> (*const u8, usize)>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> Option<Tree> {
|
||||
unsafe extern "C" fn read<T: FnMut(usize, Point) -> (*const u8, usize)>(
|
||||
payload: *mut c_void,
|
||||
byte_offset: u32,
|
||||
position: ffi::TSPoint,
|
||||
bytes_read: *mut u32,
|
||||
) -> *const c_char {
|
||||
let input = (payload as *mut T).as_mut().unwrap();
|
||||
let (ptr, length) = (*input)(byte_offset as usize, position.into());
|
||||
*bytes_read = length as u32;
|
||||
return ptr as *const c_char;
|
||||
};
|
||||
|
||||
let c_input = ffi::TSInput {
|
||||
payload: input as *mut T as *mut c_void,
|
||||
read: Some(read::<T>),
|
||||
encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
|
||||
};
|
||||
|
||||
let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
|
||||
let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
|
||||
if c_new_tree.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(Tree(c_new_tree))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_utf16_ptr<T: FnMut(usize, Point) -> (*const u16, usize)>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
old_tree: Option<&Tree>,
|
||||
) -> Option<Tree> {
|
||||
unsafe extern "C" fn read<T: FnMut(usize, Point) -> (*const u16, usize)>(
|
||||
payload: *mut c_void,
|
||||
byte_offset: u32,
|
||||
position: ffi::TSPoint,
|
||||
bytes_read: *mut u32,
|
||||
) -> *const c_char {
|
||||
let input = (payload as *mut T).as_mut().unwrap();
|
||||
let (ptr, length) = (*input)(
|
||||
byte_offset as usize,
|
||||
Point {
|
||||
row: position.row as usize,
|
||||
column: position.column as usize / 2,
|
||||
},
|
||||
);
|
||||
*bytes_read = length as u32 * 2;
|
||||
ptr as *const c_char
|
||||
};
|
||||
|
||||
let c_input = ffi::TSInput {
|
||||
payload: input as *mut T as *mut c_void,
|
||||
read: Some(read::<T>),
|
||||
encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
|
||||
};
|
||||
|
||||
let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
|
||||
let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
|
||||
if c_new_tree.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(Tree(c_new_tree))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Parser {
|
||||
fn drop(&mut self) {
|
||||
self.stop_printing_dot_graphs();
|
||||
self.set_logger(None);
|
||||
unsafe { ffi::ts_parser_delete(self.0) }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for Parser {}
|
||||
|
||||
impl Tree {
|
||||
pub fn root_node(&self) -> Node {
|
||||
Node::new(unsafe { ffi::ts_tree_root_node(self.0) }).unwrap()
|
||||
}
|
||||
|
||||
pub fn edit(&mut self, edit: &InputEdit) {
|
||||
let edit = edit.into();
|
||||
unsafe { ffi::ts_tree_edit(self.0, &edit) };
|
||||
}
|
||||
|
||||
pub fn walk(&self) -> TreeCursor {
|
||||
self.root_node().walk()
|
||||
}
|
||||
|
||||
pub fn walk_with_properties<'a, P>(
|
||||
&'a self,
|
||||
property_sheet: &'a PropertySheet<P>,
|
||||
source: &'a str,
|
||||
) -> TreePropertyCursor<'a, P> {
|
||||
TreePropertyCursor::new(self, property_sheet, source)
|
||||
}
|
||||
|
||||
pub fn changed_ranges(&self, other: &Tree) -> Vec<Range> {
|
||||
unsafe {
|
||||
let mut count = 0;
|
||||
let ptr =
|
||||
ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
|
||||
let ranges = slice::from_raw_parts(ptr, count);
|
||||
let result = ranges.into_iter().map(|r| r.clone().into()).collect();
|
||||
free_ptr(ptr as *mut c_void);
|
||||
result
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for Tree {}
|
||||
|
||||
impl fmt::Debug for Tree {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "{{Tree {:?}}}", self.root_node())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Tree {
|
||||
fn drop(&mut self) {
|
||||
unsafe { ffi::ts_tree_delete(self.0) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Tree {
|
||||
fn clone(&self) -> Tree {
|
||||
unsafe { Tree(ffi::ts_tree_copy(self.0)) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tree> Node<'tree> {
|
||||
fn new(node: ffi::TSNode) -> Option<Self> {
|
||||
if node.id.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(Node(node, PhantomData))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn kind_id(&self) -> u16 {
|
||||
unsafe { ffi::ts_node_symbol(self.0) }
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> &'static str {
|
||||
unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
|
||||
.to_str()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn is_named(&self) -> bool {
|
||||
unsafe { ffi::ts_node_is_named(self.0) }
|
||||
}
|
||||
|
||||
pub fn has_changes(&self) -> bool {
|
||||
unsafe { ffi::ts_node_has_changes(self.0) }
|
||||
}
|
||||
|
||||
pub fn has_error(&self) -> bool {
|
||||
unsafe { ffi::ts_node_has_error(self.0) }
|
||||
}
|
||||
|
||||
pub fn is_error(&self) -> bool {
|
||||
self.kind_id() == u16::MAX
|
||||
}
|
||||
|
||||
pub fn is_missing(&self) -> bool {
|
||||
unsafe { ffi::ts_node_is_missing(self.0) }
|
||||
}
|
||||
|
||||
pub fn start_byte(&self) -> usize {
|
||||
unsafe { ffi::ts_node_start_byte(self.0) as usize }
|
||||
}
|
||||
|
||||
pub fn end_byte(&self) -> usize {
|
||||
unsafe { ffi::ts_node_end_byte(self.0) as usize }
|
||||
}
|
||||
|
||||
pub fn range(&self) -> Range {
|
||||
Range {
|
||||
start_byte: self.start_byte(),
|
||||
end_byte: self.end_byte(),
|
||||
start_point: self.start_position(),
|
||||
end_point: self.end_position(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_position(&self) -> Point {
|
||||
let result = unsafe { ffi::ts_node_start_point(self.0) };
|
||||
result.into()
|
||||
}
|
||||
|
||||
pub fn end_position(&self) -> Point {
|
||||
let result = unsafe { ffi::ts_node_end_point(self.0) };
|
||||
result.into()
|
||||
}
|
||||
|
||||
pub fn child(&self, i: usize) -> Option<Self> {
|
||||
Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) })
|
||||
}
|
||||
|
||||
pub fn child_count(&self) -> usize {
|
||||
unsafe { ffi::ts_node_child_count(self.0) as usize }
|
||||
}
|
||||
|
||||
pub fn children<'a>(&'a self) -> impl Iterator<Item = Node<'tree>> + 'a {
|
||||
(0..self.child_count())
|
||||
.into_iter()
|
||||
.map(move |i| self.child(i).unwrap())
|
||||
}
|
||||
|
||||
pub fn named_child<'a>(&'a self, i: usize) -> Option<Self> {
|
||||
Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) })
|
||||
}
|
||||
|
||||
pub fn named_child_count(&self) -> usize {
|
||||
unsafe { ffi::ts_node_named_child_count(self.0) as usize }
|
||||
}
|
||||
|
||||
pub fn parent(&self) -> Option<Self> {
|
||||
Self::new(unsafe { ffi::ts_node_parent(self.0) })
|
||||
}
|
||||
|
||||
pub fn next_sibling(&self) -> Option<Self> {
|
||||
Self::new(unsafe { ffi::ts_node_next_sibling(self.0) })
|
||||
}
|
||||
|
||||
pub fn prev_sibling(&self) -> Option<Self> {
|
||||
Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) })
|
||||
}
|
||||
|
||||
pub fn next_named_sibling(&self) -> Option<Self> {
|
||||
Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) })
|
||||
}
|
||||
|
||||
pub fn prev_named_sibling(&self) -> Option<Self> {
|
||||
Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) })
|
||||
}
|
||||
|
||||
pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
|
||||
Self::new(unsafe {
|
||||
ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
|
||||
Self::new(unsafe {
|
||||
ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option<Self> {
|
||||
Self::new(unsafe {
|
||||
ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option<Self> {
|
||||
Self::new(unsafe {
|
||||
ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn to_sexp(&self) -> String {
|
||||
let c_string = unsafe { ffi::ts_node_string(self.0) };
|
||||
let result = unsafe { CStr::from_ptr(c_string) }
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
unsafe { free_ptr(c_string as *mut c_void) };
|
||||
result
|
||||
}
|
||||
|
||||
pub fn utf8_text<'a>(&self, source: &'a str) -> Result<&'a str, str::Utf8Error> {
|
||||
str::from_utf8(&source.as_bytes()[self.start_byte()..self.end_byte()])
|
||||
}
|
||||
|
||||
pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] {
|
||||
&source[self.start_byte()..self.end_byte()]
|
||||
}
|
||||
|
||||
pub fn walk(&self) -> TreeCursor<'tree> {
|
||||
TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
|
||||
}
|
||||
|
||||
pub fn edit(&mut self, edit: &InputEdit) {
|
||||
let edit = edit.into();
|
||||
unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for Node<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.id == other.0.id
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for Node<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(
|
||||
f,
|
||||
"{{Node {} {} - {}}}",
|
||||
self.kind(),
|
||||
self.start_position(),
|
||||
self.end_position()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TreeCursor<'a> {
|
||||
pub fn node(&self) -> Node<'a> {
|
||||
Node(
|
||||
unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
|
||||
PhantomData,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn goto_first_child(&mut self) -> bool {
|
||||
return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) };
|
||||
}
|
||||
|
||||
pub fn goto_parent(&mut self) -> bool {
|
||||
return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) };
|
||||
}
|
||||
|
||||
pub fn goto_next_sibling(&mut self) -> bool {
|
||||
return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) };
|
||||
}
|
||||
|
||||
pub fn goto_first_child_for_index(&mut self, index: usize) -> Option<usize> {
|
||||
let result =
|
||||
unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) };
|
||||
if result < 0 {
|
||||
None
|
||||
} else {
|
||||
Some(result as usize)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset(&mut self, node: Node<'a>) {
|
||||
unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) };
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for TreeCursor<'a> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { ffi::ts_tree_cursor_delete(&mut self.0) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, P> TreePropertyCursor<'a, P> {
|
||||
fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<P>, source: &'a str) -> Self {
|
||||
let mut result = Self {
|
||||
cursor: tree.root_node().walk(),
|
||||
child_index_stack: vec![0],
|
||||
state_stack: vec![0],
|
||||
property_sheet,
|
||||
source,
|
||||
};
|
||||
let state = result.next_state(&result.current_state(), result.cursor.node().kind_id(), 0);
|
||||
result.state_stack.push(state);
|
||||
result
|
||||
}
|
||||
|
||||
pub fn node(&self) -> Node<'a> {
|
||||
self.cursor.node()
|
||||
}
|
||||
|
||||
pub fn node_properties(&self) -> &'a P {
|
||||
&self.property_sheet.property_sets[self.current_state().property_set_id]
|
||||
}
|
||||
|
||||
pub fn goto_first_child(&mut self) -> bool {
|
||||
if self.cursor.goto_first_child() {
|
||||
let child_index = 0;
|
||||
let next_state_id = {
|
||||
let state = &self.current_state();
|
||||
let kind_id = self.cursor.node().kind_id();
|
||||
self.next_state(state, kind_id, child_index)
|
||||
};
|
||||
self.state_stack.push(next_state_id);
|
||||
self.child_index_stack.push(child_index);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn goto_next_sibling(&mut self) -> bool {
|
||||
if self.cursor.goto_next_sibling() {
|
||||
let child_index = self.child_index_stack.pop().unwrap() + 1;
|
||||
self.state_stack.pop();
|
||||
let next_state_id = {
|
||||
let state = &self.current_state();
|
||||
let kind_id = self.cursor.node().kind_id();
|
||||
self.next_state(state, kind_id, child_index)
|
||||
};
|
||||
self.state_stack.push(next_state_id);
|
||||
self.child_index_stack.push(child_index);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn goto_parent(&mut self) -> bool {
|
||||
if self.cursor.goto_parent() {
|
||||
self.state_stack.pop();
|
||||
self.child_index_stack.pop();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn next_state(
|
||||
&self,
|
||||
state: &PropertyState,
|
||||
node_kind_id: u16,
|
||||
node_child_index: usize,
|
||||
) -> usize {
|
||||
state
|
||||
.transitions
|
||||
.get(&node_kind_id)
|
||||
.and_then(|transitions| {
|
||||
for transition in transitions.iter() {
|
||||
if let Some(text_regex_index) = transition.text_regex_index {
|
||||
let node = self.cursor.node();
|
||||
let text = &self.source.as_bytes()[node.start_byte()..node.end_byte()];
|
||||
if let Ok(text) = str::from_utf8(text) {
|
||||
if !self.property_sheet.text_regexes[text_regex_index].is_match(text) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(child_index) = transition.child_index {
|
||||
if child_index != node_child_index {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return Some(transition.state_id);
|
||||
}
|
||||
None
|
||||
})
|
||||
.unwrap_or(state.default_next_state_id)
|
||||
}
|
||||
|
||||
fn current_state(&self) -> &PropertyState {
|
||||
&self.property_sheet.states[*self.state_stack.last().unwrap()]
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
pub fn new(row: usize, column: usize) -> Self {
|
||||
Point { row, column }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Point {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "({}, {})", self.row, self.column)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<ffi::TSPoint> for Point {
|
||||
fn into(self) -> ffi::TSPoint {
|
||||
ffi::TSPoint {
|
||||
row: self.row as u32,
|
||||
column: self.column as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ffi::TSPoint> for Point {
|
||||
fn from(point: ffi::TSPoint) -> Self {
|
||||
Self {
|
||||
row: point.row as usize,
|
||||
column: point.column as usize,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<ffi::TSRange> for Range {
|
||||
fn into(self) -> ffi::TSRange {
|
||||
ffi::TSRange {
|
||||
start_byte: self.start_byte as u32,
|
||||
end_byte: self.end_byte as u32,
|
||||
start_point: self.start_point.into(),
|
||||
end_point: self.end_point.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ffi::TSRange> for Range {
|
||||
fn from(range: ffi::TSRange) -> Self {
|
||||
Self {
|
||||
start_byte: range.start_byte as usize,
|
||||
end_byte: range.end_byte as usize,
|
||||
start_point: range.start_point.into(),
|
||||
end_point: range.end_point.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Into<ffi::TSInputEdit> for &'a InputEdit {
|
||||
fn into(self) -> ffi::TSInputEdit {
|
||||
ffi::TSInputEdit {
|
||||
start_byte: self.start_byte as u32,
|
||||
old_end_byte: self.old_end_byte as u32,
|
||||
new_end_byte: self.new_end_byte as u32,
|
||||
start_point: self.start_position.into(),
|
||||
old_end_point: self.old_end_position.into(),
|
||||
new_end_point: self.new_end_position.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<P> PropertySheet<P> {
|
||||
pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError>
|
||||
where
|
||||
P: DeserializeOwned,
|
||||
{
|
||||
let input: PropertySheetJSON<P> =
|
||||
serde_json::from_str(json).map_err(PropertySheetError::InvalidJSON)?;
|
||||
let mut states = Vec::new();
|
||||
let mut text_regexes = Vec::new();
|
||||
let mut text_regex_patterns = Vec::new();
|
||||
|
||||
for state in input.states.iter() {
|
||||
let mut transitions = HashMap::new();
|
||||
let node_kind_count = language.node_kind_count();
|
||||
for transition in state.transitions.iter() {
|
||||
let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() {
|
||||
if let Some(index) =
|
||||
text_regex_patterns.iter().position(|r| *r == regex_pattern)
|
||||
{
|
||||
Some(index)
|
||||
} else {
|
||||
text_regex_patterns.push(regex_pattern);
|
||||
text_regexes.push(
|
||||
Regex::new(®ex_pattern).map_err(PropertySheetError::InvalidRegex)?,
|
||||
);
|
||||
Some(text_regexes.len() - 1)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
for i in 0..(node_kind_count as u16) {
|
||||
if transition.kind == language.node_kind_for_id(i)
|
||||
&& transition.named == language.node_kind_is_named(i)
|
||||
{
|
||||
let entry = transitions.entry(i).or_insert(Vec::new());
|
||||
entry.push(PropertyTransition {
|
||||
child_index: transition.index,
|
||||
state_id: transition.state_id,
|
||||
text_regex_index,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
states.push(PropertyState {
|
||||
transitions,
|
||||
default_next_state_id: state.default_next_state_id,
|
||||
property_set_id: state.property_set_id,
|
||||
});
|
||||
}
|
||||
Ok(Self {
|
||||
property_sets: input.property_sets,
|
||||
states,
|
||||
text_regexes,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn map<F, T, E>(self, mut f: F) -> Result<PropertySheet<T>, E>
|
||||
where
|
||||
F: FnMut(P) -> Result<T, E>,
|
||||
{
|
||||
let mut property_sets = Vec::with_capacity(self.property_sets.len());
|
||||
for set in self.property_sets {
|
||||
property_sets.push(f(set)?);
|
||||
}
|
||||
Ok(PropertySheet {
|
||||
states: self.states,
|
||||
text_regexes: self.text_regexes,
|
||||
property_sets,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "rust_tree_sitter_free"]
|
||||
fn free_ptr(ptr: *mut c_void);
|
||||
}
|
||||
59
lib/build.rs
Normal file
59
lib/build.rs
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
extern crate cc;
|
||||
|
||||
use std::{env, fs};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS");
|
||||
if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() {
|
||||
if let (Some(clang_path), Some(scan_build_path)) = (which("clang"), which("scan-build")) {
|
||||
let clang_path = clang_path.to_str().unwrap();
|
||||
let scan_build_path = scan_build_path.to_str().unwrap();
|
||||
env::set_var(
|
||||
"CC",
|
||||
&format!(
|
||||
"{} -analyze-headers --use-analyzer={} cc",
|
||||
scan_build_path, clang_path
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mut config = cc::Build::new();
|
||||
|
||||
println!("cargo:rerun-if-env-changed=TREE_SITTER_TEST");
|
||||
if env::var("TREE_SITTER_TEST").is_ok() {
|
||||
config.define("TREE_SITTER_TEST", "");
|
||||
}
|
||||
|
||||
let src_path = Path::new("src");
|
||||
|
||||
for entry in fs::read_dir(&src_path).unwrap() {
|
||||
let entry = entry.unwrap();
|
||||
let path = src_path.join(entry.file_name());
|
||||
println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
|
||||
}
|
||||
|
||||
config
|
||||
.define("UTF8PROC_STATIC", "")
|
||||
.flag_if_supported("-std=c99")
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.include("include")
|
||||
.include("utf8proc")
|
||||
.file(src_path.join("lib.c"))
|
||||
.file(Path::new("binding").join("helper.c"))
|
||||
.compile("tree-sitter");
|
||||
}
|
||||
|
||||
fn which(exe_name: impl AsRef<Path>) -> Option<PathBuf> {
|
||||
env::var_os("PATH").and_then(|paths| {
|
||||
env::split_paths(&paths).find_map(|dir| {
|
||||
let full_path = dir.join(&exe_name);
|
||||
if full_path.is_file() {
|
||||
Some(full_path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef TREE_SITTER_RUNTIME_H_
|
||||
#define TREE_SITTER_RUNTIME_H_
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
#define TREE_SITTER_API_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -83,7 +83,7 @@ const TSLanguage *ts_parser_language(const TSParser *);
|
|||
bool ts_parser_set_language(TSParser *, const TSLanguage *);
|
||||
TSLogger ts_parser_logger(const TSParser *);
|
||||
void ts_parser_set_logger(TSParser *, TSLogger);
|
||||
void ts_parser_print_dot_graphs(TSParser *, FILE *);
|
||||
void ts_parser_print_dot_graphs(TSParser *, int);
|
||||
void ts_parser_halt_on_error(TSParser *, bool);
|
||||
TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput);
|
||||
TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t);
|
||||
|
|
@ -153,4 +153,4 @@ uint32_t ts_language_version(const TSLanguage *);
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_RUNTIME_H_
|
||||
#endif // TREE_SITTER_API_H_
|
||||
|
|
@ -13,7 +13,7 @@ extern "C" {
|
|||
#define ts_builtin_sym_end 0
|
||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||
|
||||
#ifndef TREE_SITTER_RUNTIME_H_
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
#endif
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef RUNTIME_ALLOC_H_
|
||||
#define RUNTIME_ALLOC_H_
|
||||
#ifndef TREE_SITTER_ALLOC_H_
|
||||
#define TREE_SITTER_ALLOC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -82,4 +82,4 @@ static inline void ts_free(void *buffer) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif // RUNTIME_ALLOC_H_
|
||||
#endif // TREE_SITTER_ALLOC_H_
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef RUNTIME_ARRAY_H_
|
||||
#define RUNTIME_ARRAY_H_
|
||||
#ifndef TREE_SITTER_ARRAY_H_
|
||||
#define TREE_SITTER_ARRAY_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -10,7 +10,7 @@ extern "C" {
|
|||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include "runtime/alloc.h"
|
||||
#include "./alloc.h"
|
||||
|
||||
#define Array(T) \
|
||||
struct { \
|
||||
|
|
@ -132,4 +132,4 @@ static inline void array__splice(VoidArray *self, size_t element_size,
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif // RUNTIME_ARRAY_H_
|
||||
#endif // TREE_SITTER_ARRAY_H_
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef RUNTIME_ATOMIC_H_
|
||||
#define RUNTIME_ATOMIC_H_
|
||||
#ifndef TREE_SITTER_ATOMIC_H_
|
||||
#define TREE_SITTER_ATOMIC_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
|
@ -27,4 +27,4 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) {
|
|||
|
||||
#endif
|
||||
|
||||
#endif // RUNTIME_ATOMIC_H_
|
||||
#endif // TREE_SITTER_ATOMIC_H_
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef RUNTIME_ERROR_COSTS_H_
|
||||
#define RUNTIME_ERROR_COSTS_H_
|
||||
#ifndef TREE_SITTER_ERROR_COSTS_H_
|
||||
#define TREE_SITTER_ERROR_COSTS_H_
|
||||
|
||||
#define ERROR_STATE 0
|
||||
#define ERROR_COST_PER_RECOVERY 500
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
#include "runtime/get_changed_ranges.h"
|
||||
#include "runtime/subtree.h"
|
||||
#include "runtime/language.h"
|
||||
#include "runtime/error_costs.h"
|
||||
#include "runtime/tree_cursor.h"
|
||||
#include "./get_changed_ranges.h"
|
||||
#include "./subtree.h"
|
||||
#include "./language.h"
|
||||
#include "./error_costs.h"
|
||||
#include "./tree_cursor.h"
|
||||
#include <assert.h>
|
||||
|
||||
// #define DEBUG_GET_CHANGED_RANGES
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
#ifndef RUNTIME_GET_CHANGED_RANGES_H_
|
||||
#define RUNTIME_GET_CHANGED_RANGES_H_
|
||||
#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
|
||||
#define TREE_SITTER_GET_CHANGED_RANGES_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "runtime/tree_cursor.h"
|
||||
#include "runtime/subtree.h"
|
||||
#include "./tree_cursor.h"
|
||||
#include "./subtree.h"
|
||||
|
||||
typedef Array(TSRange) TSRangeArray;
|
||||
|
||||
|
|
@ -33,4 +33,4 @@ unsigned ts_subtree_get_changed_ranges(
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif // RUNTIME_GET_CHANGED_RANGES_H_
|
||||
#endif // TREE_SITTER_GET_CHANGED_RANGES_H_
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
#include "runtime/language.h"
|
||||
#include "runtime/subtree.h"
|
||||
#include "runtime/error_costs.h"
|
||||
#include "./language.h"
|
||||
#include "./subtree.h"
|
||||
#include "./error_costs.h"
|
||||
#include <string.h>
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
#ifndef RUNTIME_LANGUAGE_H_
|
||||
#define RUNTIME_LANGUAGE_H_
|
||||
#ifndef TREE_SITTER_LANGUAGE_H_
|
||||
#define TREE_SITTER_LANGUAGE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "runtime/subtree.h"
|
||||
#include "./subtree.h"
|
||||
#include "tree_sitter/parser.h"
|
||||
|
||||
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
|
||||
|
|
@ -91,4 +91,4 @@ ts_language_alias_sequence(const TSLanguage *self, unsigned id) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif // RUNTIME_LANGUAGE_H_
|
||||
#endif // TREE_SITTER_LANGUAGE_H_
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
#ifndef RUNTIME_LENGTH_H_
|
||||
#define RUNTIME_LENGTH_H_
|
||||
#ifndef TREE_SITTER_LENGTH_H_
|
||||
#define TREE_SITTER_LENGTH_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "runtime/point.h"
|
||||
#include "tree_sitter/runtime.h"
|
||||
#include "./point.h"
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t bytes;
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
#include <stdio.h>
|
||||
#include "runtime/lexer.h"
|
||||
#include "runtime/subtree.h"
|
||||
#include "runtime/length.h"
|
||||
#include "runtime/utf16.h"
|
||||
#include "./lexer.h"
|
||||
#include "./subtree.h"
|
||||
#include "./length.h"
|
||||
#include "./utf16.h"
|
||||
#include "utf8proc.h"
|
||||
|
||||
#define LOG(...) \
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
#ifndef RUNTIME_LEXER_H_
|
||||
#define RUNTIME_LEXER_H_
|
||||
#ifndef TREE_SITTER_LEXER_H_
|
||||
#define TREE_SITTER_LEXER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "runtime/length.h"
|
||||
#include "runtime/subtree.h"
|
||||
#include "tree_sitter/runtime.h"
|
||||
#include "./length.h"
|
||||
#include "./subtree.h"
|
||||
#include "tree_sitter/api.h"
|
||||
#include "tree_sitter/parser.h"
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -45,4 +45,4 @@ TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif // RUNTIME_LEXER_H_
|
||||
#endif // TREE_SITTER_LEXER_H_
|
||||
18
lib/src/lib.c
Normal file
18
lib/src/lib.c
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
// The Tree-sitter library can be built by compiling this one source file.
|
||||
//
|
||||
// The following directories must be added to the include path:
|
||||
// - src
|
||||
// - include
|
||||
// - externals/utf8proc
|
||||
|
||||
#include "./get_changed_ranges.c"
|
||||
#include "./language.c"
|
||||
#include "./lexer.c"
|
||||
#include "./node.c"
|
||||
#include "./parser.c"
|
||||
#include "./stack.c"
|
||||
#include "./subtree.c"
|
||||
#include "./tree_cursor.c"
|
||||
#include "./tree.c"
|
||||
#include "./utf16.c"
|
||||
#include "utf8proc.c"
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
#include <stdbool.h>
|
||||
#include "runtime/subtree.h"
|
||||
#include "runtime/tree.h"
|
||||
#include "runtime/language.h"
|
||||
#include "./subtree.h"
|
||||
#include "./tree.h"
|
||||
#include "./language.h"
|
||||
|
||||
typedef struct {
|
||||
Subtree parent;
|
||||
|
|
@ -2,19 +2,19 @@
|
|||
#include <stdio.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include "tree_sitter/runtime.h"
|
||||
#include "runtime/subtree.h"
|
||||
#include "runtime/lexer.h"
|
||||
#include "runtime/length.h"
|
||||
#include "runtime/array.h"
|
||||
#include "runtime/language.h"
|
||||
#include "runtime/alloc.h"
|
||||
#include "runtime/stack.h"
|
||||
#include "runtime/reusable_node.h"
|
||||
#include "runtime/reduce_action.h"
|
||||
#include "runtime/error_costs.h"
|
||||
#include "runtime/get_changed_ranges.h"
|
||||
#include "runtime/tree.h"
|
||||
#include "tree_sitter/api.h"
|
||||
#include "./subtree.h"
|
||||
#include "./lexer.h"
|
||||
#include "./length.h"
|
||||
#include "./array.h"
|
||||
#include "./language.h"
|
||||
#include "./alloc.h"
|
||||
#include "./stack.h"
|
||||
#include "./reusable_node.h"
|
||||
#include "./reduce_action.h"
|
||||
#include "./error_costs.h"
|
||||
#include "./get_changed_ranges.h"
|
||||
#include "./tree.h"
|
||||
|
||||
#define LOG(...) \
|
||||
if (self->lexer.logger.log || self->dot_graph_file) { \
|
||||
|
|
@ -28,10 +28,10 @@
|
|||
fputs("\n\n", self->dot_graph_file); \
|
||||
}
|
||||
|
||||
#define LOG_TREE() \
|
||||
if (self->dot_graph_file) { \
|
||||
ts_subtree_print_dot_graph(self->finished_tree, self->language, self->dot_graph_file); \
|
||||
fputs("\n", self->dot_graph_file); \
|
||||
#define LOG_TREE(tree) \
|
||||
if (self->dot_graph_file) { \
|
||||
ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
|
||||
fputs("\n", self->dot_graph_file); \
|
||||
}
|
||||
|
||||
#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
|
||||
|
|
@ -417,6 +417,13 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
|
|||
parse_state,
|
||||
self->language
|
||||
);
|
||||
|
||||
LOG(
|
||||
"lexed_lookahead sym:%s, size:%u, character:'%c'",
|
||||
SYM_NAME(ts_subtree_symbol(result)),
|
||||
ts_subtree_total_size(result).bytes,
|
||||
first_error_character
|
||||
);
|
||||
} else {
|
||||
if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
|
||||
self->lexer.token_start_position = self->lexer.token_end_position;
|
||||
|
|
@ -467,13 +474,14 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
|
|||
length
|
||||
);
|
||||
}
|
||||
|
||||
LOG(
|
||||
"lexed_lookahead sym:%s, size:%u",
|
||||
SYM_NAME(ts_subtree_symbol(result)),
|
||||
ts_subtree_total_size(result).bytes
|
||||
);
|
||||
}
|
||||
|
||||
LOG(
|
||||
"lexed_lookahead sym:%s, size:%u",
|
||||
SYM_NAME(ts_subtree_symbol(result)),
|
||||
ts_subtree_total_size(result).bytes
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -1355,7 +1363,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
|
|||
);
|
||||
|
||||
MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
|
||||
ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token);
|
||||
ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
|
||||
lookahead = ts_subtree_from_mut(mutable_lookahead);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1542,8 +1550,16 @@ void ts_parser_set_logger(TSParser *self, TSLogger logger) {
|
|||
self->lexer.logger = logger;
|
||||
}
|
||||
|
||||
void ts_parser_print_dot_graphs(TSParser *self, FILE *file) {
|
||||
self->dot_graph_file = file;
|
||||
void ts_parser_print_dot_graphs(TSParser *self, int fd) {
|
||||
if (self->dot_graph_file) {
|
||||
fclose(self->dot_graph_file);
|
||||
}
|
||||
|
||||
if (fd >= 0) {
|
||||
self->dot_graph_file = fdopen(fd, "a");
|
||||
} else {
|
||||
self->dot_graph_file = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) {
|
||||
|
|
@ -1615,6 +1631,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
|
|||
);
|
||||
reusable_node_reset(&self->reusable_node, old_tree->root);
|
||||
LOG("parse_after_edit");
|
||||
LOG_TREE(self->old_tree);
|
||||
for (unsigned i = 0; i < self->included_range_differences.size; i++) {
|
||||
TSRange *range = &self->included_range_differences.contents[i];
|
||||
LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
|
||||
|
|
@ -1673,7 +1690,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
|
|||
|
||||
ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
|
||||
LOG("done");
|
||||
LOG_TREE();
|
||||
LOG_TREE(self->finished_tree);
|
||||
|
||||
TSTree *result = ts_tree_new(
|
||||
self->finished_tree,
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef RUNTIME_POINT_H_
|
||||
#define RUNTIME_POINT_H_
|
||||
#ifndef TREE_SITTER_POINT_H_
|
||||
#define TREE_SITTER_POINT_H_
|
||||
|
||||
#include "tree_sitter/runtime.h"
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
|
||||
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
#ifndef RUNTIME_REDUCE_ACTION_H_
|
||||
#define RUNTIME_REDUCE_ACTION_H_
|
||||
#ifndef TREE_SITTER_REDUCE_ACTION_H_
|
||||
#define TREE_SITTER_REDUCE_ACTION_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "runtime/array.h"
|
||||
#include "tree_sitter/runtime.h"
|
||||
#include "./array.h"
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t count;
|
||||
|
|
@ -31,4 +31,4 @@ static inline void ts_reduce_action_set_add(ReduceActionSet *self,
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif // RUNTIME_REDUCE_ACTION_H_
|
||||
#endif // TREE_SITTER_REDUCE_ACTION_H_
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
#include "runtime/subtree.h"
|
||||
#include "./subtree.h"
|
||||
|
||||
typedef struct {
|
||||
Subtree tree;
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
#include "runtime/alloc.h"
|
||||
#include "runtime/language.h"
|
||||
#include "runtime/subtree.h"
|
||||
#include "runtime/array.h"
|
||||
#include "runtime/stack.h"
|
||||
#include "runtime/length.h"
|
||||
#include "./alloc.h"
|
||||
#include "./language.h"
|
||||
#include "./subtree.h"
|
||||
#include "./array.h"
|
||||
#include "./stack.h"
|
||||
#include "./length.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue