From a4c4b85a16ce0ecbb550d6de47801d2e387e629b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 5 Dec 2018 12:50:12 -0800 Subject: [PATCH] Initial commit --- .gitignore | 2 + Cargo.lock | 812 ++++++++++++++++++ Cargo.toml | 17 + src/build_tables/item.rs | 22 + src/build_tables/mod.rs | 34 + src/error.rs | 13 + src/generate.rs | 26 + src/grammars.rs | 98 +++ src/main.rs | 35 + src/parse_grammar.rs | 153 ++++ src/prepare_grammar/expand_repeats.rs | 220 +++++ src/prepare_grammar/extract_simple_aliases.rs | 10 + src/prepare_grammar/extract_tokens.rs | 7 + src/prepare_grammar/flatten_grammar.rs | 7 + src/prepare_grammar/intern_symbols.rs | 237 +++++ src/prepare_grammar/mod.rs | 40 + src/prepare_grammar/normalize_rules.rs | 5 + src/render/mod.rs | 16 + src/rules.rs | 205 +++++ src/tables.rs | 77 ++ 20 files changed, 2036 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/build_tables/item.rs create mode 100644 src/build_tables/mod.rs create mode 100644 src/error.rs create mode 100644 src/generate.rs create mode 100644 src/grammars.rs create mode 100644 src/main.rs create mode 100644 src/parse_grammar.rs create mode 100644 src/prepare_grammar/expand_repeats.rs create mode 100644 src/prepare_grammar/extract_simple_aliases.rs create mode 100644 src/prepare_grammar/extract_tokens.rs create mode 100644 src/prepare_grammar/flatten_grammar.rs create mode 100644 src/prepare_grammar/intern_symbols.rs create mode 100644 src/prepare_grammar/mod.rs create mode 100644 src/prepare_grammar/normalize_rules.rs create mode 100644 src/render/mod.rs create mode 100644 src/rules.rs create mode 100644 src/tables.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..53eaa219 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +**/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 00000000..20908681 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,812 @@ +[[package]] +name = "aho-corasick" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "argon2rs" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", + "scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "arrayvec" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "atty" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "backtrace" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "backtrace-sys" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bitflags" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "bitvec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "blake2-rfc" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cc" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "cfg-if" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "clap" +version = "2.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "constant_time_eq" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "crossbeam-channel" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-utils" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "crossbeam-utils" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "dirs" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "failure" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "failure_derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", + "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fnv" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "fuchsia-zircon" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fuchsia-zircon-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "globset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", + "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ignore" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "itoa" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "lazy_static" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libc" +version = "0.2.44" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libloading" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", + "vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "linked-hash-map" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "lock_api" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "log" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lru-cache" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "linked-hash-map 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "memchr" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "memoffset" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "nodrop" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "owning_ref" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "parking_lot" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "parking_lot_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "pkg-config" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "proc-macro2" +version = "0.4.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "quote" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "redox_syscall" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "redox_termios" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "redox_users" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "argon2rs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rusqlite" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "libsqlite3-sys 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)", + "lru-cache 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rust-tree-sitter-cli" +version = "0.1.0" +dependencies = [ + "bitvec 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", + "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", + "tree-sitter 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ryu" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "same-file" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "scoped_threadpool" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "scopeguard" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde_derive" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_json" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "smallvec" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "stable_deref_trait" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "strsim" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "syn" +version = "0.15.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "synstructure" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "termion" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "textwrap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "time" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "tree-sitter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ucd-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicode-width" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unreachable" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "vcpkg" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "vec_map" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "walkdir" +version = "2.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-util" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" +"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +"checksum argon2rs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3f67b0b6a86dae6e67ff4ca2b6201396074996379fba2b92ff649126f37cb392" +"checksum arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f405cc4c21cd8b784f6c8fc2adf9bc00f59558f0049b5ec21517f875963040cc" +"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" +"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" +"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" +"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" +"checksum bitvec 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e37e2176261200377c7cde4c6de020394174df556c356f965e4bc239f5ce1c5a" +"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400" +"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" +"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" +"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" +"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e" +"checksum crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7b85741761b7f160bc5e7e0c14986ef685b7f8bf9b7ad081c60c604bb4649827" +"checksum crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2449aaa4ec7ef96e5fb24db16024b935df718e9ae1cec0a1e68feeca2efca7b8" +"checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015" +"checksum crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c55913cc2799171a550e307918c0a360e8c16004820291bf3b638969b4a01816" +"checksum dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "88972de891f6118092b643d85a0b28e0678e0f948d7f879aa32f2d5aafe97d2a" +"checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" +"checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" +"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" +"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" +"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" +"checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865" +"checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01" +"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" +"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" +"checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311" +"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" +"checksum libsqlite3-sys 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d3711dfd91a1081d2458ad2d06ea30a8755256e74038be2ad927d94e1c955ca8" +"checksum linked-hash-map 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7860ec297f7008ff7a1e3382d7f7e1dcd69efc94751a2284bafc3d013c2aa939" +"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" +"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" +"checksum lru-cache 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4d06ff7ff06f729ce5f4e227876cb88d10bc59cd4ae1e09fbb2bde15c850dc21" +"checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16" +"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" +"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" +"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13" +"checksum parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0802bff09003b291ba756dc7e79313e51cc31667e94afbe847def490424cde5" +"checksum parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad7f7e6ebdc79edff6fdcb87a55b620174f7a989e3eb31b65231f4af57f00b8c" +"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c" +"checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" +"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" +"checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd" +"checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c" +"checksum rand_core 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1961a422c4d189dfb50ffa9320bf1f2a9bd54ecb92792fb9477f99a1045f3372" +"checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" +"checksum redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d" +"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" +"checksum redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "214a97e49be64fd2c86f568dd0cb2c757d2cc53de95b273b6ad0a1c908482f26" +"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" +"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" +"checksum rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c9d9118f1ce84d8d0b67f9779936432fb42bb620cef2122409d786892cce9a3c" +"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" +"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" +"checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267" +"checksum scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8" +"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" +"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" +"checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef" +"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" +"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" +"checksum smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b73ea3738b47563803ef814925e69be00799a8c07420be8b996f8e98fb2336db" +"checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" +"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" +"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" +"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" +"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" +"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" +"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" +"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b" +"checksum tree-sitter 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "311adf1e004ac816285a1196c93ea36364857c3adc37ffc9fd5ed0d70545391a" +"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" +"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" +"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" +"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" +"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" +"checksum vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "def296d3eb3b12371b2c7d0e83bfe1403e4db2d7a0bba324a12b21c4ee13143d" +"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" +"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" +"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +"checksum walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d9d7ed3431229a144296213105a390676cc49c9b6a72bd19f3176c98e129fa1" +"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" +"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +"checksum winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "afc5508759c5bf4285e61feb862b6083c8480aec864fa17a81fdec6f69b461ab" +"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..965cc81e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "rust-tree-sitter-cli" +version = "0.1.0" +authors = ["Max Brunsfeld "] +edition = "2018" + +[dependencies] +bitvec = "0.8" +clap = "2.32" +dirs = "1.0.2" +ignore = "0.4.4" +libloading = "0.5" +rusqlite = "0.14.0" +serde = "1.0" +serde_derive = "1.0" +serde_json = "1.0" +tree-sitter = "0.3.1" diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs new file mode 100644 index 00000000..c8d30997 --- /dev/null +++ b/src/build_tables/item.rs @@ -0,0 +1,22 @@ +use crate::grammars::Production; +use std::collections::HashMap; +use bitvec::BitVec; + +#[derive(Debug, PartialEq, Eq)] +pub(super) struct LookaheadSet { + terminal_bits: BitVec, + external_bits: BitVec, + eof: bool, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub(super) struct ParseItem { + variable_index: u32, + production_index: u32, + step_index: u32, +} + +#[derive(Debug, PartialEq, Eq)] +pub(super) struct ParseItemSet { + entries: HashMap +} diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs new file mode 100644 index 00000000..c5dd5b54 --- /dev/null +++ b/src/build_tables/mod.rs @@ -0,0 +1,34 @@ +mod item; + +use std::collections::{HashMap, VecDeque}; +use crate::grammars::{SyntaxGrammar, LexicalGrammar}; +use crate::tables::{ParseTable, LexTable, ParseStateId}; +use crate::rules::{AliasMap, Symbol}; +use crate::error::Result; +use self::item::ParseItemSet; + +type SymbolSequence = Vec; + +struct ParseStateQueueEntry { + preceding_symbols: SymbolSequence, + item_set: ParseItemSet, + state_id: ParseStateId, +} + +struct ParseTableBuilder<'a> { + syntax_grammar: &'a SyntaxGrammar, + lexical_grammar: &'a LexicalGrammar, + simple_aliases: &'a AliasMap, + state_ids_by_item_set: HashMap, + item_sets_by_state_id: Vec<&'a ParseItemSet>, + parse_state_queue: VecDeque, + parse_table: ParseTable, +} + +pub fn build_tables( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, + simple_aliases: &AliasMap +) -> Result<(ParseTable, LexTable, LexTable, Option)> { + unimplemented!(); +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 00000000..90e7b8f9 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,13 @@ +#[derive(Debug)] +pub enum Error { + GrammarError(String), + SymbolError(String), +} + +pub type Result = std::result::Result; + +impl From for Error { + fn from(error: serde_json::Error) -> Self { + Error::GrammarError(error.to_string()) + } +} diff --git a/src/generate.rs b/src/generate.rs new file mode 100644 index 00000000..4507fb6f --- /dev/null +++ b/src/generate.rs @@ -0,0 +1,26 @@ +use crate::error::Result; +use crate::parse_grammar::parse_grammar; +use crate::prepare_grammar::prepare_grammar; +use crate::build_tables::build_tables; +use crate::render::render_c_code; + +pub fn generate_parser_for_grammar(input: String) -> Result { + let input_grammar = parse_grammar(&input)?; + let (syntax_grammar, lexical_grammar, simple_aliases) = prepare_grammar(&input_grammar)?; + let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables( + &syntax_grammar, + &lexical_grammar, + &simple_aliases + )?; + let c_code = render_c_code( + &input_grammar.name, + parse_table, + main_lex_table, + keyword_lex_table, + keyword_capture_token, + syntax_grammar, + lexical_grammar, + simple_aliases + ); + Ok(c_code) +} diff --git a/src/grammars.rs b/src/grammars.rs new file mode 100644 index 00000000..6f5b772e --- /dev/null +++ b/src/grammars.rs @@ -0,0 +1,98 @@ +use crate::rules::{Associativity, Alias, Rule, Symbol}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum VariableType { + Hidden, + Auxiliary, + Anonymous, + Named +} + +// Input grammar + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct InputVariable { + pub name: String, + pub kind: VariableType, + pub rule: Rule, +} + +#[derive(PartialEq, Eq)] +pub struct InputGrammar { + pub name: String, + pub variables: Vec, + pub extra_tokens: Vec, + pub expected_conflicts: Vec>, + pub external_tokens: Vec, + pub variables_to_inline: Vec, + pub word_token: Option, +} + +// Extracted lexical grammar + +#[derive(PartialEq, Eq)] +pub struct LexicalVariable { + name: String, + kind: VariableType, + rule: Rule, + is_string: bool, +} + +pub struct LexicalGrammar { + variables: Vec, + separators: Vec, +} + +// Extracted syntax grammar + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ProductionStep { + symbol: Symbol, + precedence: i32, + associativity: Option, + alias: Option, + is_excluded: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Production { + steps: Vec, + dynamic_precedence: i32, +} + +#[derive(Clone, PartialEq, Eq)] +pub struct SyntaxVariable { + name: String, + kind: VariableType, +} + +#[derive(Clone, PartialEq, Eq)] +pub struct ExternalToken { + name: String, + kind: VariableType, + corresponding_internal_token: Symbol, +} + +pub struct SyntaxGrammar { + variables: Vec, + extra_tokens: Vec, + expected_conflicts: Vec>, + external_tokens: Vec, + variables_to_inline: Vec, + word_token: Symbol, +} + +#[cfg(test)] +impl InputVariable { + pub fn named(name: &str, rule: Rule) -> Self { + Self { name: name.to_string(), kind: VariableType::Named, rule } + } + + pub fn auxiliary(name: &str, rule: Rule) -> Self { + Self { name: name.to_string(), kind: VariableType::Auxiliary, rule } + } + + pub fn hidden(name: &str, rule: Rule) -> Self { + Self { name: name.to_string(), kind: VariableType::Hidden, rule } + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 00000000..3eeb306a --- /dev/null +++ b/src/main.rs @@ -0,0 +1,35 @@ +use clap::{App, Arg, SubCommand}; + +#[macro_use] extern crate serde_derive; +#[macro_use] extern crate serde_json; + +mod build_tables; +mod error; +mod generate; +mod grammars; +mod parse_grammar; +mod prepare_grammar; +mod render; +mod rules; +mod tables; + +fn main() { + let matches = App::new("tree-sitter") + .version("0.1") + .author("Max Brunsfeld ") + .about("Generates and tests parsers") + .subcommand( + SubCommand::with_name("generate") + .about("Generate a parser") + ).subcommand( + SubCommand::with_name("parse") + .about("Parse a file") + .arg(Arg::with_name("path").index(1)) + ).subcommand( + SubCommand::with_name("test") + .about("Run a parser's tests") + .arg(Arg::with_name("path").index(1).required(true)) + .arg(Arg::with_name("line").index(2).required(true)) + .arg(Arg::with_name("column").index(3).required(true)) + ); +} diff --git a/src/parse_grammar.rs b/src/parse_grammar.rs new file mode 100644 index 00000000..4c21e5ba --- /dev/null +++ b/src/parse_grammar.rs @@ -0,0 +1,153 @@ +use serde_json::{Map, Value}; +use crate::error::Result; +use crate::grammars::{InputGrammar, InputVariable, VariableType}; +use crate::rules::Rule; +use std::collections::HashMap; + +#[derive(Deserialize)] +#[serde(tag = "type")] +#[allow(non_camel_case_types)] +pub enum RuleJSON { + BLANK, + STRING { + value: String, + }, + PATTERN { + value: String, + }, + SYMBOL { + name: String, + }, + CHOICE { + members: Vec, + }, + SEQ { + members: Vec, + }, + REPEAT { + content: Box, + }, + PREC_LEFT { + value: i32, + content: Box, + }, + PREC_RIGHT { + value: i32, + content: Box, + }, + PREC { + value: i32, + content: Box, + }, + TOKEN { + content: Box, + }, + TOKEN_IMMEDIATE { + content: Box, + }, +} + +#[derive(Deserialize)] +struct GrammarJSON { + name: String, + rules: Map, + conflicts: Option>>, + externals: Option>, + extras: Option>, + inline: Option>, + word: Option, +} + +pub fn parse_grammar(input: &str) -> Result { + let grammar_json: GrammarJSON = serde_json::from_str(&input)?; + + let mut variables = Vec::with_capacity(grammar_json.rules.len()); + for (name, value) in grammar_json.rules { + variables.push(InputVariable { + name: name.to_owned(), + kind: VariableType::Named, + rule: parse_rule(serde_json::from_value(value)?), + }) + } + + let extra_tokens = grammar_json.extras + .unwrap_or(Vec::new()) + .into_iter() + .map(parse_rule) + .collect(); + let external_tokens = grammar_json.externals + .unwrap_or(Vec::new()) + .into_iter() + .map(parse_rule) + .collect(); + let expected_conflicts = grammar_json.conflicts + .unwrap_or(Vec::new()); + let variables_to_inline = grammar_json.inline + .unwrap_or(Vec::new()); + + Ok(InputGrammar { + name: grammar_json.name, + word_token: grammar_json.word, + variables, + extra_tokens, + expected_conflicts, + external_tokens, + variables_to_inline, + }) +} + +fn parse_rule(json: RuleJSON) -> Rule { + match json { + RuleJSON::BLANK => Rule::Blank, + RuleJSON::STRING { value } => Rule::String(value), + RuleJSON::PATTERN { value } => Rule::Pattern(value), + RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), + RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), + RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()), + RuleJSON::REPEAT { content } => Rule::repeat(parse_rule(*content)), + RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)), + RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)), + RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)), + RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)), + RuleJSON::TOKEN_IMMEDIATE { content } => Rule::immediate_token(parse_rule(*content)), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_grammar() { + let grammar = parse_grammar(&json!({ + "name": "my_lang", + "rules": { + "file": { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "statement" + } + }, + "statement": { + "type": "STRING", + "value": "foo" + } + } + }).to_string()).unwrap(); + + assert_eq!(grammar.name, "my_lang"); + assert_eq!(grammar.variables, vec![ + InputVariable { + name: "file".to_string(), + kind: VariableType::Named, + rule: Rule::repeat(Rule::NamedSymbol("statement".to_string())) + }, + InputVariable { + name: "statement".to_string(), + kind: VariableType::Named, + rule: Rule::String("foo".to_string()) + }, + ]); + } +} diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs new file mode 100644 index 00000000..69db150c --- /dev/null +++ b/src/prepare_grammar/expand_repeats.rs @@ -0,0 +1,220 @@ +use crate::rules::{Rule, Symbol}; +use crate::grammars::{InputVariable, VariableType}; +use std::collections::HashMap; +use std::mem; +use std::rc::Rc; +use super::ExtractedGrammar; + +struct Expander { + variable_name: String, + repeat_count_in_variable: usize, + preceding_symbol_count: usize, + auxiliary_variables: Vec, + existing_repeats: HashMap +} + +impl Expander { + fn expand_variable(&mut self, variable: &mut InputVariable) { + self.variable_name.clear(); + self.variable_name.push_str(&variable.name); + self.repeat_count_in_variable = 0; + let mut rule = Rule::Blank; + mem::swap(&mut rule, &mut variable.rule); + variable.rule = self.expand_rule(&rule); + } + + fn expand_rule(&mut self, rule: &Rule) -> Rule { + match rule { + Rule::Choice { elements } => + Rule::Choice { + elements: elements.iter().map(|element| self.expand_rule(element)).collect() + }, + + Rule::Seq { left, right } => + Rule::Seq { + left: Rc::new(self.expand_rule(left)), + right: Rc::new(self.expand_rule(right)), + }, + + Rule::Repeat(content) => { + let inner_rule = self.expand_rule(content); + + if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) { + return Rule::Symbol(*existing_symbol); + } + + self.repeat_count_in_variable += 1; + let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable); + let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len()); + let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol)); + self.existing_repeats.insert(inner_rule.clone(), repeat_symbol); + self.auxiliary_variables.push(InputVariable { + name: rule_name, + kind: VariableType::Auxiliary, + rule: Rule::Choice { + elements: vec![ + Rule::Seq { + left: rc_symbol.clone(), + right: rc_symbol + }, + inner_rule + ], + }, + }); + + Rule::Symbol(repeat_symbol) + } + + Rule::Metadata { rule, params } => Rule::Metadata { + rule: Rc::new(self.expand_rule(rule)), + params: params.clone() + }, + + _ => rule.clone() + } + } +} + +pub(super) fn expand_repeats(mut grammar: ExtractedGrammar) -> ExtractedGrammar { + let mut expander = Expander { + variable_name: String::new(), + repeat_count_in_variable: 0, + preceding_symbol_count: grammar.variables.len(), + auxiliary_variables: Vec::new(), + existing_repeats: HashMap::new(), + }; + + for mut variable in grammar.variables.iter_mut() { + expander.expand_variable(&mut variable); + } + + grammar.variables.extend(expander.auxiliary_variables.into_iter()); + grammar +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basic_repeat_expansion() { + // Repeats nested inside of sequences and choices are expanded. + let grammar = expand_repeats(build_grammar(vec![ + InputVariable::named("rule0", Rule::seq(vec![ + Rule::terminal(10), + Rule::choice(vec![ + Rule::repeat(Rule::terminal(11)), + Rule::repeat(Rule::terminal(12)), + ]), + Rule::terminal(13), + ])), + ])); + + assert_eq!(grammar.variables, vec![ + InputVariable::named("rule0", Rule::seq(vec![ + Rule::terminal(10), + Rule::choice(vec![ + Rule::non_terminal(1), + Rule::non_terminal(2), + ]), + Rule::terminal(13), + ])), + InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![ + Rule::seq(vec![ + Rule::non_terminal(1), + Rule::non_terminal(1), + ]), + Rule::terminal(11), + ])), + InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![ + Rule::seq(vec![ + Rule::non_terminal(2), + Rule::non_terminal(2), + ]), + Rule::terminal(12), + ])), + ]); + } + + #[test] + fn test_repeat_deduplication() { + // Terminal 4 appears inside of a repeat in three different places. + let grammar = expand_repeats(build_grammar(vec![ + InputVariable::named("rule0", Rule::choice(vec![ + Rule::seq(vec![ Rule::terminal(1), Rule::repeat(Rule::terminal(4)) ]), + Rule::seq(vec![ Rule::terminal(2), Rule::repeat(Rule::terminal(4)) ]), + ])), + InputVariable::named("rule1", Rule::seq(vec![ + Rule::terminal(3), + Rule::repeat(Rule::terminal(4)), + ])), + ])); + + // Only one auxiliary rule is created for repeating terminal 4. + assert_eq!(grammar.variables, vec![ + InputVariable::named("rule0", Rule::choice(vec![ + Rule::seq(vec![ Rule::terminal(1), Rule::non_terminal(2) ]), + Rule::seq(vec![ Rule::terminal(2), Rule::non_terminal(2) ]), + ])), + InputVariable::named("rule1", Rule::seq(vec![ + Rule::terminal(3), + Rule::non_terminal(2), + ])), + InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![ + Rule::seq(vec![ + Rule::non_terminal(2), + Rule::non_terminal(2), + ]), + Rule::terminal(4), + ])) + ]); + } + + #[test] + fn test_expansion_of_nested_repeats() { + let grammar = expand_repeats(build_grammar(vec![ + InputVariable::named("rule0", Rule::seq(vec![ + Rule::terminal(10), + Rule::repeat(Rule::seq(vec![ + Rule::terminal(11), + Rule::repeat(Rule::terminal(12)) + ])), + ])), + ])); + + assert_eq!(grammar.variables, vec![ + InputVariable::named("rule0", Rule::seq(vec![ + Rule::terminal(10), + Rule::non_terminal(2), + ])), + InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![ + Rule::seq(vec![ + Rule::non_terminal(1), + Rule::non_terminal(1), + ]), + Rule::terminal(12), + ])), + InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![ + Rule::seq(vec![ + Rule::non_terminal(2), + Rule::non_terminal(2), + ]), + Rule::seq(vec![ + Rule::terminal(11), + Rule::non_terminal(1), + ]), + ])), + ]); + } + + fn build_grammar(variables: Vec) -> ExtractedGrammar { + ExtractedGrammar { + variables, + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + } + } +} diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs new file mode 100644 index 00000000..250246f3 --- /dev/null +++ b/src/prepare_grammar/extract_simple_aliases.rs @@ -0,0 +1,10 @@ +use crate::rules::AliasMap; +use crate::grammars::{LexicalGrammar, SyntaxGrammar}; +use super::ExtractedGrammar; + +pub(super) fn extract_simple_aliases( + syntax_grammar: &mut SyntaxGrammar, + lexical_grammar: &mut LexicalGrammar +) -> AliasMap { + unimplemented!(); +} diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs new file mode 100644 index 00000000..660d3819 --- /dev/null +++ b/src/prepare_grammar/extract_tokens.rs @@ -0,0 +1,7 @@ +use crate::error::Result; +use crate::grammars::LexicalGrammar; +use super::{InternedGrammar, ExtractedGrammar}; + +pub(super) fn extract_tokens(grammar: InternedGrammar) -> Result<(ExtractedGrammar, LexicalGrammar)> { + unimplemented!(); +} diff --git a/src/prepare_grammar/flatten_grammar.rs b/src/prepare_grammar/flatten_grammar.rs new file mode 100644 index 00000000..36fe76c9 --- /dev/null +++ b/src/prepare_grammar/flatten_grammar.rs @@ -0,0 +1,7 @@ +use crate::error::Result; +use crate::grammars::SyntaxGrammar; +use super::ExtractedGrammar; + +pub(super) fn flatten_grammar(grammar: ExtractedGrammar) -> Result { + unimplemented!(); +} diff --git a/src/prepare_grammar/intern_symbols.rs b/src/prepare_grammar/intern_symbols.rs new file mode 100644 index 00000000..00a5c330 --- /dev/null +++ b/src/prepare_grammar/intern_symbols.rs @@ -0,0 +1,237 @@ +use crate::error::{Error, Result}; +use crate::rules::{Rule, Symbol}; +use crate::grammars::{InputGrammar, InputVariable, VariableType}; +use std::rc::Rc; +use super::InternedGrammar; + +pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result { + let interner = Interner { grammar }; + + if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden { + return Err(Error::GrammarError("Grammar's start rule must be visible".to_string())); + } + + let mut variables = Vec::with_capacity(grammar.variables.len()); + for variable in grammar.variables.iter() { + variables.push(InputVariable { + name: variable.name.clone(), + kind: variable_type_for_name(&variable.name), + rule: interner.intern_rule(&variable.rule)?, + }); + } + + let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len()); + for external_token in grammar.external_tokens.iter() { + let rule = interner.intern_rule(&external_token)?; + let (name, kind) = if let Rule::NamedSymbol(name) = external_token { + (name.clone(), variable_type_for_name(&name)) + } else { + (String::new(), VariableType::Anonymous) + }; + external_tokens.push(InputVariable { name, kind, rule }); + } + + let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len()); + for extra_token in grammar.extra_tokens.iter() { + extra_tokens.push(interner.intern_rule(extra_token)?); + } + + let mut expected_conflicts = Vec::new(); + for conflict in grammar.expected_conflicts.iter() { + let mut interned_conflict = Vec::with_capacity(conflict.len()); + for name in conflict { + interned_conflict.push(interner + .intern_name(&name) + .ok_or_else(|| symbol_error(name))? + ); + } + expected_conflicts.push(interned_conflict); + } + + let mut variables_to_inline = Vec::new(); + for name in grammar.variables_to_inline.iter() { + if let Some(symbol) = interner.intern_name(&name) { + variables_to_inline.push(symbol); + } + } + + let mut word_token = None; + if let Some(name) = grammar.word_token.as_ref() { + word_token = Some(interner + .intern_name(&name) + .ok_or_else(|| symbol_error(&name))? + ); + } + + Ok(InternedGrammar { + variables, + external_tokens, + extra_tokens, + expected_conflicts, + variables_to_inline, + word_token, + }) +} + +struct Interner<'a> { + grammar: &'a InputGrammar +} + +impl<'a> Interner<'a> { + fn intern_rule(&self, rule: &Rule) -> Result { + match rule { + Rule::Choice { elements } => { + let mut result = Vec::with_capacity(elements.len()); + for element in elements { + result.push(self.intern_rule(element)?); + } + Ok(Rule::Choice { elements: result }) + }, + + Rule::Seq { left, right } => + Ok(Rule::Seq { + left: Rc::new(self.intern_rule(left)?), + right: Rc::new(self.intern_rule(right)?), + }), + + Rule::Repeat(content) => + Ok(Rule::Repeat(Rc::new(self.intern_rule(content)?))), + + Rule::Metadata { rule, params } => + Ok(Rule::Metadata { + rule: Rc::new(self.intern_rule(rule)?), + params: params.clone() + }), + + Rule::NamedSymbol(name) => { + if let Some(symbol) = self.intern_name(&name) { + Ok(Rule::Symbol(symbol)) + } else { + Err(symbol_error(name)) + } + }, + + _ => Ok(rule.clone()) + + } + } + + fn intern_name(&self, symbol: &str) -> Option { + for (i, variable) in self.grammar.variables.iter().enumerate() { + if variable.name == symbol { + return Some(Symbol::non_terminal(i)) + } + } + + for (i, external_token) in self.grammar.external_tokens.iter().enumerate() { + if let Rule::NamedSymbol(name) = external_token { + if name == symbol { + return Some(Symbol::external(i)) + } + } + } + + return None + } +} + +fn symbol_error(name: &str) -> Error { + Error::SymbolError(format!("Undefined symbol '{}'", name)) +} + +fn variable_type_for_name(name: &str) -> VariableType { + if name.starts_with("_") { + VariableType::Hidden + } else { + VariableType::Named + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basic_repeat_expansion() { + let grammar = intern_symbols(&build_grammar(vec![ + InputVariable::named("x", Rule::choice(vec![ + Rule::named("y"), + Rule::named("_z"), + ])), + InputVariable::named("y", Rule::named("_z")), + InputVariable::named("_z", Rule::string("a")), + ])).unwrap(); + + assert_eq!(grammar.variables, vec![ + InputVariable::named("x", Rule::choice(vec![ + Rule::non_terminal(1), + Rule::non_terminal(2), + ])), + InputVariable::named("y", Rule::non_terminal(2)), + InputVariable::hidden("_z", Rule::string("a")), + ]); + } + + #[test] + fn test_interning_external_token_names() { + // Variable `y` is both an internal and an external token. + // Variable `z` is just an external token. + let mut input_grammar = build_grammar(vec![ + InputVariable::named("w", Rule::choice(vec![ + Rule::named("x"), + Rule::named("y"), + Rule::named("z"), + ])), + InputVariable::named("x", Rule::string("a")), + InputVariable::named("y", Rule::string("b")), + ]); + input_grammar.external_tokens.extend(vec![ + Rule::named("y"), + Rule::named("z"), + ]); + + let grammar = intern_symbols(&input_grammar).unwrap(); + + // Variable `y` is referred to by its internal index. + // Variable `z` is referred to by its external index. + assert_eq!(grammar.variables, vec![ + InputVariable::named("w", Rule::choice(vec![ + Rule::non_terminal(1), + Rule::non_terminal(2), + Rule::external(1), + ])), + InputVariable::named("x", Rule::string("a")), + InputVariable::named("y", Rule::string("b")), + ]); + + // The external token for `y` refers back to its internal index. + assert_eq!(grammar.external_tokens, vec![ + InputVariable::named("y", Rule::non_terminal(2)), + InputVariable::named("z", Rule::external(1)), + ]); + } + + #[test] + fn test_grammar_with_undefined_symbols() { + let result = intern_symbols(&build_grammar(vec![ + InputVariable::named("x", Rule::named("y")), + ])); + + match result { + Err(Error::SymbolError(message)) => assert_eq!(message, "Undefined symbol 'y'"), + _ => panic!("Expected an error but got none"), + } + } + + fn build_grammar(variables: Vec) -> InputGrammar { + InputGrammar { + variables, + name: "the_language".to_string(), + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + } + } +} diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs new file mode 100644 index 00000000..0788edca --- /dev/null +++ b/src/prepare_grammar/mod.rs @@ -0,0 +1,40 @@ +mod intern_symbols; +mod extract_tokens; +mod expand_repeats; +mod flatten_grammar; +mod normalize_rules; +mod extract_simple_aliases; + +use crate::rules::{AliasMap, Rule, Symbol}; +use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, InputVariable, ExternalToken}; +use crate::error::Result; +use self::intern_symbols::intern_symbols; +use self::extract_tokens::extract_tokens; +use self::expand_repeats::expand_repeats; +use self::flatten_grammar::flatten_grammar; +use self::normalize_rules::normalize_rules; +use self::extract_simple_aliases::extract_simple_aliases; + +pub(self) struct IntermediateGrammar { + variables: Vec, + extra_tokens: Vec, + expected_conflicts: Vec>, + external_tokens: Vec, + variables_to_inline: Vec, + word_token: Option, +} + +pub(self) type InternedGrammar = IntermediateGrammar; +pub(self) type ExtractedGrammar = IntermediateGrammar; + +pub fn prepare_grammar( + input_grammar: &InputGrammar +) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> { + let interned_grammar = intern_symbols(input_grammar)?; + let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?; + let syntax_grammar = expand_repeats(syntax_grammar); + let mut syntax_grammar = flatten_grammar(syntax_grammar)?; + let mut lexical_grammar = normalize_rules(lexical_grammar); + let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar); + Ok((syntax_grammar, lexical_grammar, simple_aliases)) +} diff --git a/src/prepare_grammar/normalize_rules.rs b/src/prepare_grammar/normalize_rules.rs new file mode 100644 index 00000000..9e625ef5 --- /dev/null +++ b/src/prepare_grammar/normalize_rules.rs @@ -0,0 +1,5 @@ +use crate::grammars::LexicalGrammar; + +pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar { + unimplemented!(); +} diff --git a/src/render/mod.rs b/src/render/mod.rs new file mode 100644 index 00000000..85ce1f32 --- /dev/null +++ b/src/render/mod.rs @@ -0,0 +1,16 @@ +use crate::rules::{Symbol, AliasMap}; +use crate::grammars::{SyntaxGrammar, LexicalGrammar}; +use crate::tables::{ParseTable, LexTable}; + +pub fn render_c_code( + name: &str, + parse_table: ParseTable, + main_lex_table: LexTable, + keyword_lex_table: LexTable, + keyword_capture_token: Option, + syntax_grammar: SyntaxGrammar, + lexical_grammar: LexicalGrammar, + simple_aliases: AliasMap, +) -> String { + unimplemented!(); +} diff --git a/src/rules.rs b/src/rules.rs new file mode 100644 index 00000000..3cccca0d --- /dev/null +++ b/src/rules.rs @@ -0,0 +1,205 @@ +use std::rc::Rc; +use std::collections::HashMap; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum SymbolType { + External, + Terminal, + NonTerminal, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Associativity { + Left, + Right +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct Alias { + value: String, + is_named: bool, +} + +pub type AliasMap = HashMap; + +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +pub struct MetadataParams { + precedence: Option, + dynamic_precedence: i32, + associativity: Option, + is_token: bool, + is_string: bool, + is_active: bool, + is_main_token: bool, + is_excluded: bool, + alias: Option, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct Symbol { + kind: SymbolType, + index: usize, +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Rule { + Blank, + CharacterSet(Vec), + String(String), + Pattern(String), + NamedSymbol(String), + Symbol(Symbol), + Choice { + elements: Vec, + }, + Metadata { + params: MetadataParams, + rule: Rc, + }, + Repeat(Rc), + Seq { + left: Rc, + right: Rc, + } +} + +impl Rule { + pub fn token(content: Rule) -> Self { + add_metadata(content, |params| { + params.is_token = true; + }) + } + + pub fn immediate_token(content: Rule) -> Self { + add_metadata(content, |params| { + params.is_token = true; + params.is_main_token = true; + }) + } + + pub fn prec(value: i32, content: Rule) -> Self { + add_metadata(content, |params| { + params.precedence = Some(value); + }) + } + + pub fn prec_left(value: i32, content: Rule) -> Self { + add_metadata(content, |params| { + params.associativity = Some(Associativity::Left); + params.precedence = Some(value); + }) + } + + pub fn prec_right(value: i32, content: Rule) -> Self { + add_metadata(content, |params| { + params.associativity = Some(Associativity::Right); + params.precedence = Some(value); + }) + } + + pub fn repeat(rule: Rule) -> Self { + Rule::Repeat(Rc::new(rule)) + } + + pub fn choice(rules: Vec) -> Self { + let mut elements = Vec::with_capacity(rules.len()); + for rule in rules { + choice_helper(&mut elements, rule); + } + Rule::Choice { elements } + } + + pub fn seq(rules: Vec) -> Self { + let mut result = Rule::Blank; + for rule in rules { + match rule { + Rule::Blank => continue, + Rule::Metadata { rule, params: _ } => { + if *rule == Rule::Blank { + continue; + } + }, + _ => { + if result == Rule::Blank { + result = rule; + } else { + result = Rule::Seq { + left: Rc::new(result), + right: Rc::new(rule), + } + } + } + } + } + result + } + + pub fn terminal(index: usize) -> Self { + Rule::Symbol(Symbol::terminal(index)) + } + + pub fn non_terminal(index: usize) -> Self { + Rule::Symbol(Symbol::non_terminal(index)) + } + + pub fn external(index: usize) -> Self { + Rule::Symbol(Symbol::external(index)) + } + + pub fn named(name: &'static str) -> Self { + Rule::NamedSymbol(name.to_string()) + } + + pub fn string(value: &'static str) -> Self { + Rule::String(value.to_string()) + } +} + +impl Symbol { + pub fn non_terminal(index: usize) -> Self { + Symbol { kind: SymbolType::NonTerminal, index } + } + + pub fn terminal(index: usize) -> Self { + Symbol { kind: SymbolType::Terminal, index } + } + + pub fn external(index: usize) -> Self { + Symbol { kind: SymbolType::External, index } + } +} + +impl From for Rule { + fn from(symbol: Symbol) -> Self { + Rule::Symbol(symbol) + } +} + +fn add_metadata(input: Rule, f: T) -> Rule { + match input { + Rule::Metadata { rule, mut params } => { + f(&mut params); + Rule::Metadata { rule, params } + }, + _ => { + let mut params = MetadataParams::default(); + f(&mut params); + Rule::Metadata { rule: Rc::new(input), params } + } + } +} + +fn choice_helper(result: &mut Vec, rule: Rule) { + match rule { + Rule::Choice {elements} => { + for element in elements { + choice_helper(result, element); + } + }, + _ => { + if !result.contains(&rule) { + result.push(rule); + } + } + } +} diff --git a/src/tables.rs b/src/tables.rs new file mode 100644 index 00000000..10b1e41d --- /dev/null +++ b/src/tables.rs @@ -0,0 +1,77 @@ +use std::collections::HashMap; +use std::ops::Range; +use crate::rules::{Associativity, Symbol, Alias}; + +pub type AliasSequenceId = usize; +pub type ParseStateId = usize; +pub type LexStateId = usize; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ParseActionType { + Error, + Shift, + Reduce, + Accept, + Recover, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ParseAction { + Accept, + Error, + Shift(ParseStateId), + ShiftExtra, + Recover, + Reduce { + symbol: Symbol, + child_count: usize, + precedence: i32, + dynamic_precedence: i32, + associativity: Option, + alias_sequence_id: Option, + is_repetition: bool, + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ParseTableEntry { + actions: Vec, + reusable: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ParseState { + terminal_entries: HashMap, + nonterminal_entries: HashMap +} + +#[derive(Debug, PartialEq, Eq)] +pub struct ParseTable { + states: Vec, + alias_sequences: Vec>, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct AdvanceAction { + state: LexStateId, + precedence: Range, + in_main_token: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct AcceptTokenAction { + symbol: Symbol, + precedence: i32, + implicit_precedence: i32, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LexState { + advance_actions: HashMap, + accept_action: Option, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct LexTable { + states: Vec, +}