From 6e4115548c1982a764ca22f819544455ca9f7807 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 10 Jul 2016 14:03:00 -0700
Subject: [PATCH 001/208] Initial commit

---
 .gitignore         |   2 +
 .gitmodules        |   3 +
 Cargo.toml         |  15 ++
 build.rs           |  35 +++++
 script/bindgen.sh  |  16 +++
 src/ffi.rs         | 333 +++++++++++++++++++++++++++++++++++++++++++++
 src/lib.rs         | 329 ++++++++++++++++++++++++++++++++++++++++++++
 vendor/tree-sitter |   1 +
 8 files changed, 734 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .gitmodules
 create mode 100644 Cargo.toml
 create mode 100644 build.rs
 create mode 100755 script/bindgen.sh
 create mode 100644 src/ffi.rs
 create mode 100644 src/lib.rs
 create mode 160000 vendor/tree-sitter

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..a9d37c56
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+target
+Cargo.lock
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..eef86f94
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "vendor/tree-sitter"]
+	path = vendor/tree-sitter
+	url = https://github.com/tree-sitter/tree-sitter
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 00000000..0a93febe
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "tree-sitter"
+version = "0.1.0"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+build = "build.rs"
+exclude = ["vendor/tree-sitter/**/*"]
+include = [
+  "vendor/tree-sitter/src/runtime/*",
+  "vendor/tree-sitter/externals/utf8proc/utf8proc*"
+]
+
+[dependencies]
+
+[build-dependencies]
+cc = "1.0"
diff --git a/build.rs b/build.rs
new file mode 100644
index 00000000..3427ed5f
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,35 @@
+extern crate cc;
+
+use std::path::Path;
+
+
+fn main() {
+    let dir_path = Path::new("vendor/tree-sitter/src/runtime");
+
+    let source_filenames = [
+      "get_changed_ranges.c",
+      "language.c",
+      "lexer.c",
+      "node.c",
+      "parser.c",
+      "parser.c",
+      "stack.c",
+      "subtree.c",
+      "tree_cursor.c",
+      "tree.c",
+      "utf16.c",
+    ];
+
+    let mut config = cc::Build::new();
+    config.include("vendor/tree-sitter/src");
+    config.include("vendor/tree-sitter/include");
+    config.include("vendor/tree-sitter/externals/utf8proc");
+    config.flag_if_supported("-Wno-unused-parameter");
+
+    for source_filename in source_filenames.iter() {
+        let source_path = dir_path.join(Path::new(&source_filename));
+        config.file(&source_path.to_str().unwrap());
+    }
+
+    config.compile("libruntime.a")
+}
diff --git a/script/bindgen.sh b/script/bindgen.sh
new file mode 100755
index 00000000..190e7a4f
--- /dev/null
+++ b/script/bindgen.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+output_path=src/ffi.rs
+header_path='vendor/tree-sitter/include/tree_sitter/runtime.h'
+
+bindgen                         \
+  --no-layout-tests             \
+  --whitelist-type '^TS.*'      \
+  --whitelist-function '^ts_.*' \
+  --opaque-type FILE            \
+  $header_path > $output_path
+
+echo "" >> $output_path
+version_constant='TREE_SITTER_LANGUAGE_VERSION'
+version_number=$(egrep "#define $version_constant (.*)" $header_path | cut -d' ' -f3)
+echo "pub const $version_constant: usize = $version_number;" >> $output_path
diff --git a/src/ffi.rs b/src/ffi.rs
new file mode 100644
index 00000000..7d1c06e8
--- /dev/null
+++ b/src/ffi.rs
@@ -0,0 +1,333 @@
+/* automatically generated by rust-bindgen */
+
+pub type FILE = [u64; 19usize];
+pub type TSSymbol = ::std::os::raw::c_ushort;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSLanguage {
+    _unused: [u8; 0],
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSParser {
+    _unused: [u8; 0],
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSTree {
+    _unused: [u8; 0],
+}
+pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0;
+pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1;
+pub type TSInputEncoding = u32;
+pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0;
+pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1;
+pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2;
+pub type TSSymbolType = u32;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSPoint {
+    pub row: u32,
+    pub column: u32,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSRange {
+    pub start: TSPoint,
+    pub end: TSPoint,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSInput {
+    pub payload: *mut ::std::os::raw::c_void,
+    pub read: ::std::option::Option<
+        unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32)
+            -> *const ::std::os::raw::c_char,
+    >,
+    pub seek: ::std::option::Option<
+        unsafe extern "C" fn(
+            payload: *mut ::std::os::raw::c_void,
+            byte_index: u32,
+            position: TSPoint,
+        ) -> ::std::os::raw::c_int,
+    >,
+    pub encoding: TSInputEncoding,
+}
+pub const TSLogType_TSLogTypeParse: TSLogType = 0;
+pub const TSLogType_TSLogTypeLex: TSLogType = 1;
+pub type TSLogType = u32;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSLogger {
+    pub payload: *mut ::std::os::raw::c_void,
+    pub log: ::std::option::Option<
+        unsafe extern "C" fn(
+            payload: *mut ::std::os::raw::c_void,
+            arg1: TSLogType,
+            arg2: *const ::std::os::raw::c_char,
+        ),
+    >,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSInputEdit {
+    pub start_byte: u32,
+    pub old_end_byte: u32,
+    pub new_end_byte: u32,
+    pub start_point: TSPoint,
+    pub old_end_point: TSPoint,
+    pub new_end_point: TSPoint,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSNode {
+    pub context: [u32; 4usize],
+    pub id: *const ::std::os::raw::c_void,
+    pub tree: *const ::std::os::raw::c_void,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSTreeCursor {
+    pub context: [u32; 2usize],
+    pub id: *const ::std::os::raw::c_void,
+    pub tree: *const ::std::os::raw::c_void,
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_new"]
+    pub fn ts_parser_new() -> *mut TSParser;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_delete"]
+    pub fn ts_parser_delete(arg1: *mut TSParser);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_language"]
+    pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_set_language"]
+    pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_logger"]
+    pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_set_logger"]
+    pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_print_dot_graphs"]
+    pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_halt_on_error"]
+    pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_parse"]
+    pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_parse_string"]
+    pub fn ts_parser_parse_string(
+        arg1: *mut TSParser,
+        arg2: *const TSTree,
+        arg3: *const ::std::os::raw::c_char,
+        arg4: u32,
+    ) -> *mut TSTree;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_copy"]
+    pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_delete"]
+    pub fn ts_tree_delete(arg1: *mut TSTree);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_root_node"]
+    pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_edit"]
+    pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_get_changed_ranges"]
+    pub fn ts_tree_get_changed_ranges(
+        arg1: *const TSTree,
+        arg2: *const TSTree,
+        arg3: *mut u32,
+    ) -> *mut TSRange;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_print_dot_graph"]
+    pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_start_byte"]
+    pub fn ts_node_start_byte(arg1: TSNode) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_start_point"]
+    pub fn ts_node_start_point(arg1: TSNode) -> TSPoint;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_end_byte"]
+    pub fn ts_node_end_byte(arg1: TSNode) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_end_point"]
+    pub fn ts_node_end_point(arg1: TSNode) -> TSPoint;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_symbol"]
+    pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_type"]
+    pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_string"]
+    pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_eq"]
+    pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_is_null"]
+    pub fn ts_node_is_null(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_is_named"]
+    pub fn ts_node_is_named(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_is_missing"]
+    pub fn ts_node_is_missing(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_has_changes"]
+    pub fn ts_node_has_changes(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_has_error"]
+    pub fn ts_node_has_error(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_parent"]
+    pub fn ts_node_parent(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_child"]
+    pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_named_child"]
+    pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_child_count"]
+    pub fn ts_node_child_count(arg1: TSNode) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_named_child_count"]
+    pub fn ts_node_named_child_count(arg1: TSNode) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_next_sibling"]
+    pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_next_named_sibling"]
+    pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_prev_sibling"]
+    pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_prev_named_sibling"]
+    pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_first_child_for_byte"]
+    pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_first_named_child_for_byte"]
+    pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_descendant_for_byte_range"]
+    pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"]
+    pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_descendant_for_point_range"]
+    pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint)
+        -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_named_descendant_for_point_range"]
+    pub fn ts_node_named_descendant_for_point_range(
+        arg1: TSNode,
+        arg2: TSPoint,
+        arg3: TSPoint,
+    ) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_new"]
+    pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_delete"]
+    pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_goto_first_child"]
+    pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"]
+    pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"]
+    pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_goto_parent"]
+    pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_current_node"]
+    pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_language_symbol_count"]
+    pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_language_symbol_name"]
+    pub fn ts_language_symbol_name(
+        arg1: *const TSLanguage,
+        arg2: TSSymbol,
+    ) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_language_symbol_type"]
+    pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_language_version"]
+    pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
+}
+
+pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8;
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 00000000..ef11757a
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,329 @@
+mod ffi;
+
+use std::ffi::CStr;
+use std::marker::PhantomData;
+use std::os::raw::{c_char, c_int, c_void};
+use std::ptr;
+
+#[derive(Clone, Copy)]
+pub struct Symbol(ffi::TSSymbol);
+
+#[derive(Clone, Copy)]
+pub struct Language(*const ffi::TSLanguage);
+
+pub trait Utf16Input {
+    fn read(&self) -> &[u16];
+    fn seek(&self, u32, Point);
+}
+
+pub trait Utf8Input {
+    fn read(&self) -> &[u8];
+    fn seek(&self, u32, Point);
+}
+
+pub enum LogType {
+    Parse,
+    Lex,
+}
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct Point {
+    pub row: u32,
+    pub column: u32,
+}
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct InputEdit {
+    pub start_byte: u32,
+    pub old_end_byte: u32,
+    pub new_end_byte: u32,
+    pub start_position: Point,
+    pub old_end_position: Point,
+    pub new_end_position: Point,
+}
+
+pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
+
+pub struct Parser(*mut ffi::TSParser);
+
+pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding);
+
+pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
+
+impl Parser {
+    pub fn new() -> Parser {
+        unsafe {
+            let parser = ffi::ts_parser_new();
+            Parser(parser)
+        }
+    }
+
+    pub fn set_language(&mut self, language: Language) {
+        unsafe {
+            ffi::ts_parser_set_language(self.0, language.0);
+        }
+    }
+
+    pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: &mut F) {
+        unsafe extern "C" fn log<F: FnMut(LogType, &str) -> ()>(
+            payload: *mut c_void,
+            c_log_type: ffi::TSLogType,
+            c_message: *const c_char,
+        ) {
+            let callback = (payload as *mut F).as_mut().unwrap();
+            if let Ok(message) = CStr::from_ptr(c_message).to_str() {
+                let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse {
+                    LogType::Parse
+                } else {
+                    LogType::Lex
+                };
+                callback(log_type, message);
+            }
+        };
+
+        let c_logger = ffi::TSLogger {
+            payload: logger as *mut F as *mut c_void,
+            log: Some(log::<F>),
+        };
+
+        unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
+    }
+
+    pub fn parse_utf8<T: Utf8Input>(
+        &mut self,
+        input: &mut T,
+        old_tree: Option<Tree>,
+    ) -> Option<Tree> {
+        unsafe extern "C" fn read<T: Utf8Input>(
+            payload: *mut c_void,
+            bytes_read: *mut u32,
+        ) -> *const c_char {
+            let input = (payload as *mut T).as_mut().unwrap();
+            let result = input.read();
+            *bytes_read = result.len() as u32;
+            return result.as_ptr() as *const c_char;
+        };
+
+        unsafe extern "C" fn seek<T: Utf8Input>(
+            payload: *mut c_void,
+            byte: u32,
+            position: ffi::TSPoint,
+        ) -> c_int {
+            let input = (payload as *mut T).as_mut().unwrap();
+            input.seek(
+                byte,
+                Point {
+                    row: position.row,
+                    column: position.column,
+                },
+            );
+            return 1;
+        };
+
+        let c_input = ffi::TSInput {
+            payload: input as *mut T as *mut c_void,
+            read: Some(read::<T>),
+            seek: Some(seek::<T>),
+            encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
+        };
+
+        let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
+
+        let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
+        if new_tree_ptr.is_null() {
+            None
+        } else {
+            Some(Tree(new_tree_ptr, ffi::TSInputEncoding_TSInputEncodingUTF8))
+        }
+    }
+
+    pub fn parse_utf16<T: Utf16Input>(
+        &mut self,
+        input: &mut T,
+        old_tree: Option<Tree>,
+    ) -> Option<Tree> {
+        unsafe extern "C" fn read<T: Utf16Input>(
+            payload: *mut c_void,
+            bytes_read: *mut u32,
+        ) -> *const c_char {
+            let input = (payload as *mut T).as_mut().unwrap();
+            let result = input.read();
+            *bytes_read = result.len() as u32 * 2;
+            return result.as_ptr() as *const c_char;
+        };
+
+        unsafe extern "C" fn seek<T: Utf16Input>(
+            payload: *mut c_void,
+            byte: u32,
+            position: ffi::TSPoint,
+        ) -> c_int {
+            let input = (payload as *mut T).as_mut().unwrap();
+            input.seek(
+                byte / 2,
+                Point {
+                    row: position.row,
+                    column: position.column / 2,
+                },
+            );
+            return 1;
+        };
+
+        let c_input = ffi::TSInput {
+            payload: input as *mut T as *mut c_void,
+            read: Some(read::<T>),
+            seek: Some(seek::<T>),
+            encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
+        };
+
+        let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
+
+        let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
+        if new_tree_ptr.is_null() {
+            None
+        } else {
+            Some(Tree(
+                new_tree_ptr,
+                ffi::TSInputEncoding_TSInputEncodingUTF16,
+            ))
+        }
+    }
+}
+
+impl Drop for Parser {
+    fn drop(&mut self) {
+        unsafe { ffi::ts_parser_delete(self.0) }
+    }
+}
+
+impl Tree {
+    pub fn root_node(&self) -> Node {
+        Node::new(unsafe { ffi::ts_tree_root_node(self.0) }).unwrap()
+    }
+
+    pub fn edit(&mut self, edit: &InputEdit) {
+        let edit = ffi::TSInputEdit {
+            start_byte: edit.start_byte,
+            old_end_byte: edit.old_end_byte,
+            new_end_byte: edit.new_end_byte,
+            start_point: edit.start_position.into(),
+            old_end_point: edit.old_end_position.into(),
+            new_end_point: edit.new_end_position.into(),
+        };
+        unsafe { ffi::ts_tree_edit(self.0, &edit) };
+    }
+
+    pub fn walk(&self) -> TreeCursor {
+        TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
+    }
+}
+
+impl Drop for Tree {
+    fn drop(&mut self) {
+        unsafe { ffi::ts_tree_delete(self.0) }
+    }
+}
+
+impl Clone for Tree {
+    fn clone(&self) -> Tree {
+        unsafe { Tree(ffi::ts_tree_copy(self.0), self.1) }
+    }
+}
+
+impl<'a> Node<'a> {
+    fn new(node: ffi::TSNode) -> Option<Self> {
+        if node.id.is_null() {
+            None
+        } else {
+            Some(Node(node, PhantomData))
+        }
+    }
+
+    pub fn name(&self) -> &'static str {
+        unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
+            .to_str()
+            .unwrap()
+    }
+
+    pub fn start_index(&self) -> u32 {
+        unsafe { ffi::ts_node_start_byte(self.0) }
+    }
+
+    pub fn end_index(&self) -> u32 {
+        unsafe { ffi::ts_node_end_byte(self.0) }
+    }
+
+    pub fn start_position(&self) -> Point {
+        let result = unsafe { ffi::ts_node_start_point(self.0) };
+        Point {
+            row: result.row,
+            column: result.column,
+        }
+    }
+
+    pub fn end_position(&self) -> Point {
+        let result = unsafe { ffi::ts_node_end_point(self.0) };
+        Point {
+            row: result.row,
+            column: result.column,
+        }
+    }
+
+    pub fn child(&self, i: u32) -> Option<Node> {
+        Self::new(unsafe { ffi::ts_node_child(self.0, i) })
+    }
+
+    pub fn parent(&self) -> Option<Node> {
+        Self::new(unsafe { ffi::ts_node_parent(self.0) })
+    }
+}
+
+impl<'a> TreeCursor<'a> {
+    fn node(&'a self) -> Node<'a> {
+        Node(
+            unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
+            PhantomData,
+        )
+    }
+
+    fn goto_first_child(&mut self) -> bool {
+        return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) };
+    }
+
+    fn goto_parent(&mut self) -> bool {
+        return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) };
+    }
+
+    fn goto_next_sibling(&mut self) -> bool {
+        return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) };
+    }
+
+    fn goto_first_child_for_index(&mut self, index: u32) -> Option<u32> {
+        let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index) };
+        if result < 0 {
+            None
+        } else {
+            Some(result as u32)
+        }
+    }
+}
+
+impl<'a> Drop for TreeCursor<'a> {
+    fn drop(&mut self) {
+        unsafe { ffi::ts_tree_cursor_delete(&mut self.0) }
+    }
+}
+
+impl Into<ffi::TSPoint> for Point {
+    fn into(self) -> ffi::TSPoint {
+        ffi::TSPoint {
+            row: self.row,
+            column: self.column,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn it_works() {}
+}
diff --git a/vendor/tree-sitter b/vendor/tree-sitter
new file mode 160000
index 00000000..5ec3769c
--- /dev/null
+++ b/vendor/tree-sitter
@@ -0,0 +1 @@
+Subproject commit 5ec3769cb4c9acfda64f80d7c14abce939e8b4c5

From 8918d1a5b14f9a54ef23dcb4b29d8bf2bccd6384 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 14:35:31 -0700
Subject: [PATCH 002/208] Add boilerplate

---
 .travis.yml |  8 ++++++++
 LICENSE     | 21 +++++++++++++++++++++
 README.md   |  8 ++++++++
 3 files changed, 37 insertions(+)
 create mode 100644 .travis.yml
 create mode 100644 LICENSE
 create mode 100644 README.md

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..32e3a71f
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,8 @@
+language: rust
+
+rust:
+  - stable
+
+branches:
+  only:
+  - master
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..971b81f9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2018 Max Brunsfeld
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..08df0e4e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,8 @@
+Rust Tree-sitter
+===========================
+
+[![Build Status](https://travis-ci.org/tree-sitter/rust-tree-sitter.svg)](https://travis-ci.org/tree-sitter/rust-tree-sitter)
+
+Rust bindings to the [Tree-sitter][] parsing library.
+
+[tree-sitter]: https://github.com/tree-sitter/tree-sitter

From f07f710db7633dc26d86163972512799ae407540 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 14:40:31 -0700
Subject: [PATCH 003/208] Compile tree-sitter sources in c99 mode

---
 build.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build.rs b/build.rs
index 3427ed5f..53265655 100644
--- a/build.rs
+++ b/build.rs
@@ -24,6 +24,7 @@ fn main() {
     config.include("vendor/tree-sitter/src");
     config.include("vendor/tree-sitter/include");
     config.include("vendor/tree-sitter/externals/utf8proc");
+    config.flag_if_supported("-std=c99");
     config.flag_if_supported("-Wno-unused-parameter");
 
     for source_filename in source_filenames.iter() {

From ead0e312624a4e20a312875c073be4dc51a2f29b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 14:43:30 -0700
Subject: [PATCH 004/208] Fix duplicated compile of parser.c

---
 build.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/build.rs b/build.rs
index 53265655..ad62f3a1 100644
--- a/build.rs
+++ b/build.rs
@@ -12,7 +12,6 @@ fn main() {
       "lexer.c",
       "node.c",
       "parser.c",
-      "parser.c",
       "stack.c",
       "subtree.c",
       "tree_cursor.c",

From 08217fff8dfc7a80b2348679144ff44344d63008 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 17:16:35 -0700
Subject: [PATCH 005/208] Get basic parsing working, add some unit tests

---
 .gitignore                    |   1 +
 .travis.yml                   |   6 ++
 Cargo.toml                    |   2 -
 build.rs                      |  31 ++++++----
 fixtures/.gitkeep             |   0
 script/fetch-test-fixtures.sh |  14 +++++
 src/lib.rs                    | 113 +++++++++++++++++++++++++++++-----
 7 files changed, 138 insertions(+), 29 deletions(-)
 create mode 100644 fixtures/.gitkeep
 create mode 100755 script/fetch-test-fixtures.sh

diff --git a/.gitignore b/.gitignore
index a9d37c56..fbd4fda0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 target
 Cargo.lock
+fixtures/tree-sitter-rust
diff --git a/.travis.yml b/.travis.yml
index 32e3a71f..10fcfe94 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,6 +3,12 @@ language: rust
 rust:
   - stable
 
+env:
+  - RUST_TREE_SITTER_TEST=1
+
+before_install:
+  - ./script/fetch-test-fixtures.sh
+
 branches:
   only:
   - master
diff --git a/Cargo.toml b/Cargo.toml
index 0a93febe..e20d40aa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,5 @@ include = [
   "vendor/tree-sitter/externals/utf8proc/utf8proc*"
 ]
 
-[dependencies]
-
 [build-dependencies]
 cc = "1.0"
diff --git a/build.rs b/build.rs
index ad62f3a1..fa8b41ea 100644
--- a/build.rs
+++ b/build.rs
@@ -1,10 +1,17 @@
 extern crate cc;
 
+use std::env;
 use std::path::Path;
 
-
 fn main() {
-    let dir_path = Path::new("vendor/tree-sitter/src/runtime");
+    let root_path = Path::new("vendor/tree-sitter");
+
+    let mut config = cc::Build::new();
+    config.flag_if_supported("-std=c99");
+    config.flag_if_supported("-Wno-unused-parameter");
+    config.include(root_path.join(Path::new("src")));
+    config.include(root_path.join(Path::new("include")));
+    config.include(root_path.join(Path::new("externals/utf8proc")));
 
     let source_filenames = [
       "get_changed_ranges.c",
@@ -19,16 +26,18 @@ fn main() {
       "utf16.c",
     ];
 
-    let mut config = cc::Build::new();
-    config.include("vendor/tree-sitter/src");
-    config.include("vendor/tree-sitter/include");
-    config.include("vendor/tree-sitter/externals/utf8proc");
-    config.flag_if_supported("-std=c99");
-    config.flag_if_supported("-Wno-unused-parameter");
+    config.files(source_filenames.iter().map(|source_filename| {
+        root_path
+            .join(Path::new(&"src/runtime"))
+            .join(Path::new(&source_filename))
+    }));
 
-    for source_filename in source_filenames.iter() {
-        let source_path = dir_path.join(Path::new(&source_filename));
-        config.file(&source_path.to_str().unwrap());
+    config.file(root_path.join(Path::new("externals/utf8proc/utf8proc.c")));
+
+    if env::var("RUST_TREE_SITTER_TEST").is_ok() {
+        let parser_dir = Path::new("fixtures/tree-sitter-rust/src");
+        config.file(parser_dir.join("parser.c"));
+        config.file(parser_dir.join("scanner.c"));
     }
 
     config.compile("libruntime.a")
diff --git a/fixtures/.gitkeep b/fixtures/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/script/fetch-test-fixtures.sh b/script/fetch-test-fixtures.sh
new file mode 100755
index 00000000..24cc316a
--- /dev/null
+++ b/script/fetch-test-fixtures.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+grammar_dir='fixtures/tree-sitter-rust'
+grammar_url='https://github.com/tree-sitter/tree-sitter-rust'
+
+if [ ! -d $grammar_dir ]; then
+  git clone $grammar_url $grammar_dir --depth=1
+fi
+
+(
+  cd $grammar_dir;
+  git fetch origin master --depth=1
+  git reset --hard origin/master;
+)
diff --git a/src/lib.rs b/src/lib.rs
index ef11757a..fa1db0f9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -8,19 +8,19 @@ use std::ptr;
 #[derive(Clone, Copy)]
 pub struct Symbol(ffi::TSSymbol);
 
-#[derive(Clone, Copy)]
-pub struct Language(*const ffi::TSLanguage);
+pub type Language = *const ffi::TSLanguage;
 
 pub trait Utf16Input {
-    fn read(&self) -> &[u16];
-    fn seek(&self, u32, Point);
+    fn read(&mut self) -> &[u16];
+    fn seek(&mut self, u32, Point);
 }
 
 pub trait Utf8Input {
-    fn read(&self) -> &[u8];
-    fn seek(&self, u32, Point);
+    fn read(&mut self) -> &[u8];
+    fn seek(&mut self, u32, Point);
 }
 
+#[derive(Debug, PartialEq, Eq)]
 pub enum LogType {
     Parse,
     Lex,
@@ -50,6 +50,11 @@ pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding);
 
 pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
+struct FlatInput<'a> {
+    bytes: &'a [u8],
+    offset: usize,
+}
+
 impl Parser {
     pub fn new() -> Parser {
         unsafe {
@@ -60,11 +65,11 @@ impl Parser {
 
     pub fn set_language(&mut self, language: Language) {
         unsafe {
-            ffi::ts_parser_set_language(self.0, language.0);
+            ffi::ts_parser_set_language(self.0, language);
         }
     }
 
-    pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: &mut F) {
+    pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: Option<&mut F>) {
         unsafe extern "C" fn log<F: FnMut(LogType, &str) -> ()>(
             payload: *mut c_void,
             c_log_type: ffi::TSLogType,
@@ -81,14 +86,24 @@ impl Parser {
             }
         };
 
-        let c_logger = ffi::TSLogger {
-            payload: logger as *mut F as *mut c_void,
-            log: Some(log::<F>),
-        };
+        let c_logger;
+        if let Some(logger) = logger {
+            c_logger = ffi::TSLogger {
+                payload: logger as *mut F as *mut c_void,
+                log: Some(log::<F>),
+            };
+        } else {
+            c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None };
+        }
 
         unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
     }
 
+    pub fn parse_str(&mut self, input: &str, old_tree: Option<Tree>) -> Option<Tree> {
+        let mut input = FlatInput { bytes: input.as_bytes(), offset: 0};
+        self.parse_utf8(&mut input, old_tree)
+    }
+
     pub fn parse_utf8<T: Utf8Input>(
         &mut self,
         input: &mut T,
@@ -239,9 +254,7 @@ impl<'a> Node<'a> {
     }
 
     pub fn name(&self) -> &'static str {
-        unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
-            .to_str()
-            .unwrap()
+        unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap()
     }
 
     pub fn start_index(&self) -> u32 {
@@ -272,11 +285,24 @@ impl<'a> Node<'a> {
         Self::new(unsafe { ffi::ts_node_child(self.0, i) })
     }
 
+    pub fn child_count(&self) -> u32 {
+        unsafe { ffi::ts_node_child_count(self.0) }
+    }
+
     pub fn parent(&self) -> Option<Node> {
         Self::new(unsafe { ffi::ts_node_parent(self.0) })
     }
+
+    pub fn to_sexp(&self) -> String {
+        let c_string = unsafe { ffi::ts_node_string(self.0) };
+        let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string();
+        unsafe { free(c_string as *mut c_void) };
+        result
+    }
 }
 
+extern "C" { fn free(pointer: *mut c_void); }
+
 impl<'a> TreeCursor<'a> {
     fn node(&'a self) -> Node<'a> {
         Node(
@@ -322,8 +348,63 @@ impl Into<ffi::TSPoint> for Point {
     }
 }
 
+impl<'a> Utf8Input for FlatInput<'a> {
+    fn read(&mut self) -> &[u8] {
+        let result = &self.bytes[self.offset..];
+        self.offset = self.bytes.len();
+        result
+    }
+
+    fn seek(&mut self, offset: u32, _position: Point) {
+        self.offset = offset as usize;
+    }
+}
+
 #[cfg(test)]
 mod tests {
+    use super::*;
+
+    fn rust() -> Language { unsafe { tree_sitter_rust() } }
+    extern "C" { fn tree_sitter_rust() -> Language; }
+
     #[test]
-    fn it_works() {}
+    fn test_basic_parsing() {
+        let mut parser = Parser::new();
+        parser.set_language(rust());
+
+        let tree = parser.parse_str("
+            struct Stuff {}
+            fn main() {}
+        ", None).unwrap();
+
+        let root_node = tree.root_node();
+        assert_eq!(root_node.name(), "source_file");
+
+        assert_eq!(
+            root_node.to_sexp(),
+            "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
+        );
+
+        let struct_node = root_node.child(0).unwrap();
+        assert_eq!(struct_node.name(), "struct_item");
+    }
+
+    #[test]
+    fn test_logging() {
+        let mut parser = Parser::new();
+        parser.set_language(rust());
+
+        let mut messages = Vec::new();
+        parser.set_logger(Some(&mut |log_type, message| {
+            messages.push((log_type, message.to_string()));
+        }));
+
+        parser.parse_str("
+            struct Stuff {}
+            fn main() {}
+        ", None).unwrap();
+
+        assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string())));
+        assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
+    }
 }

From 7e6675d56effa6177eaf387b13942c8219107ae1 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 17:23:35 -0700
Subject: [PATCH 006/208] Use a more unique library name when building C
 sources

---
 build.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.rs b/build.rs
index fa8b41ea..4e2c3b8f 100644
--- a/build.rs
+++ b/build.rs
@@ -40,5 +40,5 @@ fn main() {
         config.file(parser_dir.join("scanner.c"));
     }
 
-    config.compile("libruntime.a")
+    config.compile("treesitter")
 }

From 572a60183c86920b0c1bc83941d70b3772534e3a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 17:29:23 -0700
Subject: [PATCH 007/208] Suppress warnings associated w/ generated bindings

---
 build.rs          |   1 +
 script/bindgen.sh |   2 +-
 src/bindings.rs   | 333 +++++++++++++++++++++++++++++++++++++++++++++
 src/ffi.rs        | 335 +---------------------------------------------
 4 files changed, 338 insertions(+), 333 deletions(-)
 create mode 100644 src/bindings.rs

diff --git a/build.rs b/build.rs
index 4e2c3b8f..5fa5d408 100644
--- a/build.rs
+++ b/build.rs
@@ -36,6 +36,7 @@ fn main() {
 
     if env::var("RUST_TREE_SITTER_TEST").is_ok() {
         let parser_dir = Path::new("fixtures/tree-sitter-rust/src");
+        config.flag_if_supported("-Wno-typedef-redefinition");
         config.file(parser_dir.join("parser.c"));
         config.file(parser_dir.join("scanner.c"));
     }
diff --git a/script/bindgen.sh b/script/bindgen.sh
index 190e7a4f..1b9008b2 100755
--- a/script/bindgen.sh
+++ b/script/bindgen.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-output_path=src/ffi.rs
+output_path=src/bindings.rs
 header_path='vendor/tree-sitter/include/tree_sitter/runtime.h'
 
 bindgen                         \
diff --git a/src/bindings.rs b/src/bindings.rs
new file mode 100644
index 00000000..7d1c06e8
--- /dev/null
+++ b/src/bindings.rs
@@ -0,0 +1,333 @@
+/* automatically generated by rust-bindgen */
+
+pub type FILE = [u64; 19usize];
+pub type TSSymbol = ::std::os::raw::c_ushort;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSLanguage {
+    _unused: [u8; 0],
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSParser {
+    _unused: [u8; 0],
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSTree {
+    _unused: [u8; 0],
+}
+pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0;
+pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1;
+pub type TSInputEncoding = u32;
+pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0;
+pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1;
+pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2;
+pub type TSSymbolType = u32;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSPoint {
+    pub row: u32,
+    pub column: u32,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSRange {
+    pub start: TSPoint,
+    pub end: TSPoint,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSInput {
+    pub payload: *mut ::std::os::raw::c_void,
+    pub read: ::std::option::Option<
+        unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32)
+            -> *const ::std::os::raw::c_char,
+    >,
+    pub seek: ::std::option::Option<
+        unsafe extern "C" fn(
+            payload: *mut ::std::os::raw::c_void,
+            byte_index: u32,
+            position: TSPoint,
+        ) -> ::std::os::raw::c_int,
+    >,
+    pub encoding: TSInputEncoding,
+}
+pub const TSLogType_TSLogTypeParse: TSLogType = 0;
+pub const TSLogType_TSLogTypeLex: TSLogType = 1;
+pub type TSLogType = u32;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSLogger {
+    pub payload: *mut ::std::os::raw::c_void,
+    pub log: ::std::option::Option<
+        unsafe extern "C" fn(
+            payload: *mut ::std::os::raw::c_void,
+            arg1: TSLogType,
+            arg2: *const ::std::os::raw::c_char,
+        ),
+    >,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSInputEdit {
+    pub start_byte: u32,
+    pub old_end_byte: u32,
+    pub new_end_byte: u32,
+    pub start_point: TSPoint,
+    pub old_end_point: TSPoint,
+    pub new_end_point: TSPoint,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSNode {
+    pub context: [u32; 4usize],
+    pub id: *const ::std::os::raw::c_void,
+    pub tree: *const ::std::os::raw::c_void,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct TSTreeCursor {
+    pub context: [u32; 2usize],
+    pub id: *const ::std::os::raw::c_void,
+    pub tree: *const ::std::os::raw::c_void,
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_new"]
+    pub fn ts_parser_new() -> *mut TSParser;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_delete"]
+    pub fn ts_parser_delete(arg1: *mut TSParser);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_language"]
+    pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_set_language"]
+    pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_logger"]
+    pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_set_logger"]
+    pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_print_dot_graphs"]
+    pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_halt_on_error"]
+    pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_parse"]
+    pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_parser_parse_string"]
+    pub fn ts_parser_parse_string(
+        arg1: *mut TSParser,
+        arg2: *const TSTree,
+        arg3: *const ::std::os::raw::c_char,
+        arg4: u32,
+    ) -> *mut TSTree;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_copy"]
+    pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_delete"]
+    pub fn ts_tree_delete(arg1: *mut TSTree);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_root_node"]
+    pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_edit"]
+    pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_get_changed_ranges"]
+    pub fn ts_tree_get_changed_ranges(
+        arg1: *const TSTree,
+        arg2: *const TSTree,
+        arg3: *mut u32,
+    ) -> *mut TSRange;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_print_dot_graph"]
+    pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_start_byte"]
+    pub fn ts_node_start_byte(arg1: TSNode) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_start_point"]
+    pub fn ts_node_start_point(arg1: TSNode) -> TSPoint;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_end_byte"]
+    pub fn ts_node_end_byte(arg1: TSNode) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_end_point"]
+    pub fn ts_node_end_point(arg1: TSNode) -> TSPoint;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_symbol"]
+    pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_type"]
+    pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_string"]
+    pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_eq"]
+    pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_is_null"]
+    pub fn ts_node_is_null(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_is_named"]
+    pub fn ts_node_is_named(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_is_missing"]
+    pub fn ts_node_is_missing(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_has_changes"]
+    pub fn ts_node_has_changes(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_has_error"]
+    pub fn ts_node_has_error(arg1: TSNode) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_parent"]
+    pub fn ts_node_parent(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_child"]
+    pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_named_child"]
+    pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_child_count"]
+    pub fn ts_node_child_count(arg1: TSNode) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_named_child_count"]
+    pub fn ts_node_named_child_count(arg1: TSNode) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_next_sibling"]
+    pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_next_named_sibling"]
+    pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_prev_sibling"]
+    pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_prev_named_sibling"]
+    pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_first_child_for_byte"]
+    pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_first_named_child_for_byte"]
+    pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_descendant_for_byte_range"]
+    pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"]
+    pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_descendant_for_point_range"]
+    pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint)
+        -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_node_named_descendant_for_point_range"]
+    pub fn ts_node_named_descendant_for_point_range(
+        arg1: TSNode,
+        arg2: TSPoint,
+        arg3: TSPoint,
+    ) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_new"]
+    pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_delete"]
+    pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_goto_first_child"]
+    pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"]
+    pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"]
+    pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_goto_parent"]
+    pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_tree_cursor_current_node"]
+    pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_language_symbol_count"]
+    pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_language_symbol_name"]
+    pub fn ts_language_symbol_name(
+        arg1: *const TSLanguage,
+        arg2: TSSymbol,
+    ) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_language_symbol_type"]
+    pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
+}
+extern "C" {
+    #[link_name = "\u{1}_ts_language_version"]
+    pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
+}
+
+pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8;
diff --git a/src/ffi.rs b/src/ffi.rs
index 7d1c06e8..323609e0 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -1,333 +1,4 @@
-/* automatically generated by rust-bindgen */
+#![allow(dead_code)]
+#![allow(non_upper_case_globals)]
 
-pub type FILE = [u64; 19usize];
-pub type TSSymbol = ::std::os::raw::c_ushort;
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSLanguage {
-    _unused: [u8; 0],
-}
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSParser {
-    _unused: [u8; 0],
-}
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSTree {
-    _unused: [u8; 0],
-}
-pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0;
-pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1;
-pub type TSInputEncoding = u32;
-pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0;
-pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1;
-pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2;
-pub type TSSymbolType = u32;
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSPoint {
-    pub row: u32,
-    pub column: u32,
-}
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSRange {
-    pub start: TSPoint,
-    pub end: TSPoint,
-}
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSInput {
-    pub payload: *mut ::std::os::raw::c_void,
-    pub read: ::std::option::Option<
-        unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32)
-            -> *const ::std::os::raw::c_char,
-    >,
-    pub seek: ::std::option::Option<
-        unsafe extern "C" fn(
-            payload: *mut ::std::os::raw::c_void,
-            byte_index: u32,
-            position: TSPoint,
-        ) -> ::std::os::raw::c_int,
-    >,
-    pub encoding: TSInputEncoding,
-}
-pub const TSLogType_TSLogTypeParse: TSLogType = 0;
-pub const TSLogType_TSLogTypeLex: TSLogType = 1;
-pub type TSLogType = u32;
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSLogger {
-    pub payload: *mut ::std::os::raw::c_void,
-    pub log: ::std::option::Option<
-        unsafe extern "C" fn(
-            payload: *mut ::std::os::raw::c_void,
-            arg1: TSLogType,
-            arg2: *const ::std::os::raw::c_char,
-        ),
-    >,
-}
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSInputEdit {
-    pub start_byte: u32,
-    pub old_end_byte: u32,
-    pub new_end_byte: u32,
-    pub start_point: TSPoint,
-    pub old_end_point: TSPoint,
-    pub new_end_point: TSPoint,
-}
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSNode {
-    pub context: [u32; 4usize],
-    pub id: *const ::std::os::raw::c_void,
-    pub tree: *const ::std::os::raw::c_void,
-}
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct TSTreeCursor {
-    pub context: [u32; 2usize],
-    pub id: *const ::std::os::raw::c_void,
-    pub tree: *const ::std::os::raw::c_void,
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_new"]
-    pub fn ts_parser_new() -> *mut TSParser;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_delete"]
-    pub fn ts_parser_delete(arg1: *mut TSParser);
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_language"]
-    pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_set_language"]
-    pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_logger"]
-    pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_set_logger"]
-    pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger);
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_print_dot_graphs"]
-    pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE);
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_halt_on_error"]
-    pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool);
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_parse"]
-    pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_parser_parse_string"]
-    pub fn ts_parser_parse_string(
-        arg1: *mut TSParser,
-        arg2: *const TSTree,
-        arg3: *const ::std::os::raw::c_char,
-        arg4: u32,
-    ) -> *mut TSTree;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_copy"]
-    pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_delete"]
-    pub fn ts_tree_delete(arg1: *mut TSTree);
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_root_node"]
-    pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_edit"]
-    pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit);
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_get_changed_ranges"]
-    pub fn ts_tree_get_changed_ranges(
-        arg1: *const TSTree,
-        arg2: *const TSTree,
-        arg3: *mut u32,
-    ) -> *mut TSRange;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_print_dot_graph"]
-    pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE);
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_start_byte"]
-    pub fn ts_node_start_byte(arg1: TSNode) -> u32;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_start_point"]
-    pub fn ts_node_start_point(arg1: TSNode) -> TSPoint;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_end_byte"]
-    pub fn ts_node_end_byte(arg1: TSNode) -> u32;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_end_point"]
-    pub fn ts_node_end_point(arg1: TSNode) -> TSPoint;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_symbol"]
-    pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_type"]
-    pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_string"]
-    pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_eq"]
-    pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_is_null"]
-    pub fn ts_node_is_null(arg1: TSNode) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_is_named"]
-    pub fn ts_node_is_named(arg1: TSNode) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_is_missing"]
-    pub fn ts_node_is_missing(arg1: TSNode) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_has_changes"]
-    pub fn ts_node_has_changes(arg1: TSNode) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_has_error"]
-    pub fn ts_node_has_error(arg1: TSNode) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_parent"]
-    pub fn ts_node_parent(arg1: TSNode) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_child"]
-    pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_named_child"]
-    pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_child_count"]
-    pub fn ts_node_child_count(arg1: TSNode) -> u32;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_named_child_count"]
-    pub fn ts_node_named_child_count(arg1: TSNode) -> u32;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_next_sibling"]
-    pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_next_named_sibling"]
-    pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_prev_sibling"]
-    pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_prev_named_sibling"]
-    pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_first_child_for_byte"]
-    pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_first_named_child_for_byte"]
-    pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_descendant_for_byte_range"]
-    pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"]
-    pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_descendant_for_point_range"]
-    pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint)
-        -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_node_named_descendant_for_point_range"]
-    pub fn ts_node_named_descendant_for_point_range(
-        arg1: TSNode,
-        arg2: TSPoint,
-        arg3: TSPoint,
-    ) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_new"]
-    pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_delete"]
-    pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_goto_first_child"]
-    pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"]
-    pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"]
-    pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_goto_parent"]
-    pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_current_node"]
-    pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_language_symbol_count"]
-    pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_language_symbol_name"]
-    pub fn ts_language_symbol_name(
-        arg1: *const TSLanguage,
-        arg2: TSSymbol,
-    ) -> *const ::std::os::raw::c_char;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_language_symbol_type"]
-    pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
-}
-extern "C" {
-    #[link_name = "\u{1}_ts_language_version"]
-    pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
-}
-
-pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8;
+include!("./bindings.rs");

From b1ff399960cb4a72fe9a4323ecfc9b633c35e545 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 18:02:01 -0700
Subject: [PATCH 008/208] :arrow_up: tree-sitter for warning fixes

---
 build.rs           | 21 +++++++++++----------
 vendor/tree-sitter |  2 +-
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/build.rs b/build.rs
index 5fa5d408..8736b645 100644
--- a/build.rs
+++ b/build.rs
@@ -4,14 +4,15 @@ use std::env;
 use std::path::Path;
 
 fn main() {
+    let mut config = cc::Build::new();
     let root_path = Path::new("vendor/tree-sitter");
 
-    let mut config = cc::Build::new();
-    config.flag_if_supported("-std=c99");
-    config.flag_if_supported("-Wno-unused-parameter");
-    config.include(root_path.join(Path::new("src")));
-    config.include(root_path.join(Path::new("include")));
-    config.include(root_path.join(Path::new("externals/utf8proc")));
+    config
+        .flag("-std=c99")
+        .flag("-Wno-unused-parameter")
+        .include(root_path.join(Path::new("src")))
+        .include(root_path.join(Path::new("include")))
+        .include(root_path.join(Path::new("externals/utf8proc")));
 
     let source_filenames = [
       "get_changed_ranges.c",
@@ -36,10 +37,10 @@ fn main() {
 
     if env::var("RUST_TREE_SITTER_TEST").is_ok() {
         let parser_dir = Path::new("fixtures/tree-sitter-rust/src");
-        config.flag_if_supported("-Wno-typedef-redefinition");
-        config.file(parser_dir.join("parser.c"));
-        config.file(parser_dir.join("scanner.c"));
+        config
+            .file(parser_dir.join("parser.c"))
+            .file(parser_dir.join("scanner.c"));
     }
 
-    config.compile("treesitter")
+    config.compile("treesitter_ffi");
 }
diff --git a/vendor/tree-sitter b/vendor/tree-sitter
index 5ec3769c..3c01382b 160000
--- a/vendor/tree-sitter
+++ b/vendor/tree-sitter
@@ -1 +1 @@
-Subproject commit 5ec3769cb4c9acfda64f80d7c14abce939e8b4c5
+Subproject commit 3c01382b95364ce40f0cf9856865a30af77f9690

From 13e26b5007b19f2f98584adf594b478f2cbb9175 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 18:08:44 -0700
Subject: [PATCH 009/208] Try a static flag

---
 build.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build.rs b/build.rs
index 8736b645..b7433f54 100644
--- a/build.rs
+++ b/build.rs
@@ -10,6 +10,7 @@ fn main() {
     config
         .flag("-std=c99")
         .flag("-Wno-unused-parameter")
+        .static_flag(true)
         .include(root_path.join(Path::new("src")))
         .include(root_path.join(Path::new("include")))
         .include(root_path.join(Path::new("externals/utf8proc")));

From 29dfa0550413cecb9f2fb13798e60f95522bb0ba Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 May 2018 19:40:06 -0700
Subject: [PATCH 010/208] Try clang

---
 .travis.yml | 12 +++++++++++-
 build.rs    |  1 -
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 10fcfe94..5b99d596 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,11 +4,21 @@ rust:
   - stable
 
 env:
-  - RUST_TREE_SITTER_TEST=1
+  - CC=clang-3.6 RUST_TREE_SITTER_TEST=1
 
 before_install:
   - ./script/fetch-test-fixtures.sh
 
+compiler: clang-3.6
+
+addons:
+  apt:
+    sources:
+      - llvm-toolchain-precise-3.6
+      - ubuntu-toolchain-r-test
+    packages:
+      - clang-3.6
+
 branches:
   only:
   - master
diff --git a/build.rs b/build.rs
index b7433f54..8736b645 100644
--- a/build.rs
+++ b/build.rs
@@ -10,7 +10,6 @@ fn main() {
     config
         .flag("-std=c99")
         .flag("-Wno-unused-parameter")
-        .static_flag(true)
         .include(root_path.join(Path::new("src")))
         .include(root_path.join(Path::new("include")))
         .include(root_path.join(Path::new("externals/utf8proc")));

From e61edf539824631b4e59a8d8ed022f7a065cf95a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 09:30:00 -0700
Subject: [PATCH 011/208] Don't perform platform-specific name mangling on C
 functions for bindings

---
 script/bindgen.sh |  1 +
 src/bindings.rs   | 57 +----------------------------------------------
 2 files changed, 2 insertions(+), 56 deletions(-)

diff --git a/script/bindgen.sh b/script/bindgen.sh
index 1b9008b2..699f0339 100755
--- a/script/bindgen.sh
+++ b/script/bindgen.sh
@@ -8,6 +8,7 @@ bindgen                         \
   --whitelist-type '^TS.*'      \
   --whitelist-function '^ts_.*' \
   --opaque-type FILE            \
+  --distrust-clang-mangling     \
   $header_path > $output_path
 
 echo "" >> $output_path
diff --git a/src/bindings.rs b/src/bindings.rs
index 7d1c06e8..1ab49bde 100644
--- a/src/bindings.rs
+++ b/src/bindings.rs
@@ -1,7 +1,7 @@
 /* automatically generated by rust-bindgen */
 
 pub type FILE = [u64; 19usize];
-pub type TSSymbol = ::std::os::raw::c_ushort;
+pub type TSSymbol = u16;
 #[repr(C)]
 #[derive(Debug, Copy, Clone)]
 pub struct TSLanguage {
@@ -93,43 +93,33 @@ pub struct TSTreeCursor {
     pub tree: *const ::std::os::raw::c_void,
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_new"]
     pub fn ts_parser_new() -> *mut TSParser;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_delete"]
     pub fn ts_parser_delete(arg1: *mut TSParser);
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_language"]
     pub fn ts_parser_language(arg1: *const TSParser) -> *const TSLanguage;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_set_language"]
     pub fn ts_parser_set_language(arg1: *mut TSParser, arg2: *const TSLanguage) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_logger"]
     pub fn ts_parser_logger(arg1: *const TSParser) -> TSLogger;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_set_logger"]
     pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger);
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_print_dot_graphs"]
     pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE);
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_halt_on_error"]
     pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool);
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_parse"]
     pub fn ts_parser_parse(arg1: *mut TSParser, arg2: *const TSTree, arg3: TSInput) -> *mut TSTree;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_parser_parse_string"]
     pub fn ts_parser_parse_string(
         arg1: *mut TSParser,
         arg2: *const TSTree,
@@ -138,23 +128,18 @@ extern "C" {
     ) -> *mut TSTree;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_copy"]
     pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_delete"]
     pub fn ts_tree_delete(arg1: *mut TSTree);
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_root_node"]
     pub fn ts_tree_root_node(arg1: *const TSTree) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_edit"]
     pub fn ts_tree_edit(arg1: *mut TSTree, arg2: *const TSInputEdit);
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_get_changed_ranges"]
     pub fn ts_tree_get_changed_ranges(
         arg1: *const TSTree,
         arg2: *const TSTree,
@@ -162,120 +147,91 @@ extern "C" {
     ) -> *mut TSRange;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_print_dot_graph"]
     pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE);
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_start_byte"]
     pub fn ts_node_start_byte(arg1: TSNode) -> u32;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_start_point"]
     pub fn ts_node_start_point(arg1: TSNode) -> TSPoint;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_end_byte"]
     pub fn ts_node_end_byte(arg1: TSNode) -> u32;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_end_point"]
     pub fn ts_node_end_point(arg1: TSNode) -> TSPoint;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_symbol"]
     pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_type"]
     pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_string"]
     pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_eq"]
     pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_is_null"]
     pub fn ts_node_is_null(arg1: TSNode) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_is_named"]
     pub fn ts_node_is_named(arg1: TSNode) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_is_missing"]
     pub fn ts_node_is_missing(arg1: TSNode) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_has_changes"]
     pub fn ts_node_has_changes(arg1: TSNode) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_has_error"]
     pub fn ts_node_has_error(arg1: TSNode) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_parent"]
     pub fn ts_node_parent(arg1: TSNode) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_child"]
     pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_named_child"]
     pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_child_count"]
     pub fn ts_node_child_count(arg1: TSNode) -> u32;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_named_child_count"]
     pub fn ts_node_named_child_count(arg1: TSNode) -> u32;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_next_sibling"]
     pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_next_named_sibling"]
     pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_prev_sibling"]
     pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_prev_named_sibling"]
     pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_first_child_for_byte"]
     pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_first_named_child_for_byte"]
     pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_descendant_for_byte_range"]
     pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_named_descendant_for_byte_range"]
     pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_descendant_for_point_range"]
     pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint)
         -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_node_named_descendant_for_point_range"]
     pub fn ts_node_named_descendant_for_point_range(
         arg1: TSNode,
         arg2: TSPoint,
@@ -283,50 +239,39 @@ extern "C" {
     ) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_new"]
     pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_delete"]
     pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_goto_first_child"]
     pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_goto_first_child_for_byte"]
     pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_goto_next_sibling"]
     pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_goto_parent"]
     pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_tree_cursor_current_node"]
     pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_language_symbol_count"]
     pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_language_symbol_name"]
     pub fn ts_language_symbol_name(
         arg1: *const TSLanguage,
         arg2: TSSymbol,
     ) -> *const ::std::os::raw::c_char;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_language_symbol_type"]
     pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
 }
 extern "C" {
-    #[link_name = "\u{1}_ts_language_version"]
     pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
 }
 

From 29c0cd3aa4d9e569c0ea2d1b4ea2652e207ca51a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 09:48:24 -0700
Subject: [PATCH 012/208] Add appveyor config

---
 README.md                      |  1 +
 appveyor.yml                   | 24 ++++++++++++++++++++++++
 script/fetch-test-fixtures.cmd | 16 ++++++++++++++++
 3 files changed, 41 insertions(+)
 create mode 100644 appveyor.yml
 create mode 100755 script/fetch-test-fixtures.cmd

diff --git a/README.md b/README.md
index 08df0e4e..40f5624f 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@ Rust Tree-sitter
 ===========================
 
 [![Build Status](https://travis-ci.org/tree-sitter/rust-tree-sitter.svg)](https://travis-ci.org/tree-sitter/rust-tree-sitter)
+[![Build status](https://ci.appveyor.com/api/projects/status/d0f6vqq3rflxx3y6/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/rust-tree-sitter/branch/master)
 
 Rust bindings to the [Tree-sitter][] parsing library.
 
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 00000000..23fe3d97
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,24 @@
+environment:
+  RUST_TREE_SITTER_TEST: true
+
+build: false
+
+install:
+  - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
+  - rustup-init -yv --default-toolchain stable
+  - set PATH=%PATH%;%USERPROFILE%\.cargo\bin
+  - rustc -vV
+  - cargo -vV
+  - script\fetch-test-fixtures.cmd
+
+test_script:
+  - cargo build
+  - cargo test
+
+branches:
+  only:
+    - master
+
+cache:
+  - fixtures
+  - C:\Users\appveyor\.cargo
diff --git a/script/fetch-test-fixtures.cmd b/script/fetch-test-fixtures.cmd
new file mode 100755
index 00000000..33543961
--- /dev/null
+++ b/script/fetch-test-fixtures.cmd
@@ -0,0 +1,16 @@
+@Echo off
+SETLOCAL
+
+Set grammar_dir=fixtures\tree-sitter-rust
+Set grammar_url=https://github.com/tree-sitter/tree-sitter-rust
+
+@IF NOT EXIST  %grammar_dir% (
+  git clone %grammar_url% %grammar_dir% --depth=1
+)
+
+pushd %grammar_dir%
+git fetch origin master --depth=1
+git reset --hard origin/master
+popd
+
+ENDLOCAL

From 8d485857e10d90f76c344811a2da645ddfb74bd2 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 10:01:37 -0700
Subject: [PATCH 013/208] Tweak build script for windows

---
 build.rs | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/build.rs b/build.rs
index 8736b645..c1e768ff 100644
--- a/build.rs
+++ b/build.rs
@@ -1,18 +1,18 @@
 extern crate cc;
 
 use std::env;
-use std::path::Path;
+use std::path::PathBuf;
 
 fn main() {
     let mut config = cc::Build::new();
-    let root_path = Path::new("vendor/tree-sitter");
+    let root_path: PathBuf = ["vendor", "tree-sitter"].iter().collect();
 
     config
-        .flag("-std=c99")
-        .flag("-Wno-unused-parameter")
-        .include(root_path.join(Path::new("src")))
-        .include(root_path.join(Path::new("include")))
-        .include(root_path.join(Path::new("externals/utf8proc")));
+        .flag_if_supported("-std=c99")
+        .flag_if_supported("-Wno-unused-parameter")
+        .include(root_path.join("src"))
+        .include(root_path.join("include"))
+        .include(root_path.join("externals").join("utf8proc"));
 
     let source_filenames = [
       "get_changed_ranges.c",
@@ -29,18 +29,19 @@ fn main() {
 
     config.files(source_filenames.iter().map(|source_filename| {
         root_path
-            .join(Path::new(&"src/runtime"))
-            .join(Path::new(&source_filename))
+            .join("src")
+            .join("runtime")
+            .join(&source_filename)
     }));
 
-    config.file(root_path.join(Path::new("externals/utf8proc/utf8proc.c")));
+    config.file(root_path.join("externals").join("utf8proc").join("utf8proc.c"));
 
     if env::var("RUST_TREE_SITTER_TEST").is_ok() {
-        let parser_dir = Path::new("fixtures/tree-sitter-rust/src");
+        let parser_dir: PathBuf = ["fixtures", "tree-sitter-rust", "src"].iter().collect();
         config
             .file(parser_dir.join("parser.c"))
             .file(parser_dir.join("scanner.c"));
     }
 
-    config.compile("treesitter_ffi");
+    config.compile("tree-sitter-runtime");
 }

From 7748f8e1687042fc477890378fc653c152bc2b31 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 10:16:28 -0700
Subject: [PATCH 014/208] Fetch submodules on appveyor

---
 appveyor.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/appveyor.yml b/appveyor.yml
index 23fe3d97..22c8b96e 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -4,11 +4,14 @@ environment:
 build: false
 
 install:
+  - git submodule update --init --recursive
+
   - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
   - rustup-init -yv --default-toolchain stable
   - set PATH=%PATH%;%USERPROFILE%\.cargo\bin
   - rustc -vV
   - cargo -vV
+
   - script\fetch-test-fixtures.cmd
 
 test_script:

From 654789f92534b4fd6d59006a13353edc923da1cb Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 10:27:36 -0700
Subject: [PATCH 015/208] Use UTF8PROC_STATIC macro

---
 build.rs           | 1 +
 vendor/tree-sitter | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/build.rs b/build.rs
index c1e768ff..7d9ee83e 100644
--- a/build.rs
+++ b/build.rs
@@ -8,6 +8,7 @@ fn main() {
     let root_path: PathBuf = ["vendor", "tree-sitter"].iter().collect();
 
     config
+        .define("UTF8PROC_STATIC", "")
         .flag_if_supported("-std=c99")
         .flag_if_supported("-Wno-unused-parameter")
         .include(root_path.join("src"))
diff --git a/vendor/tree-sitter b/vendor/tree-sitter
index 3c01382b..9c1e82a7 160000
--- a/vendor/tree-sitter
+++ b/vendor/tree-sitter
@@ -1 +1 @@
-Subproject commit 3c01382b95364ce40f0cf9856865a30af77f9690
+Subproject commit 9c1e82a7eac97767cee0469faa2722fd5753b065

From 993bfea669b1ba49fa4a37b11abd82c5206f0209 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 10:39:00 -0700
Subject: [PATCH 016/208] Add missing source file

---
 build.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build.rs b/build.rs
index 7d9ee83e..2843c758 100644
--- a/build.rs
+++ b/build.rs
@@ -22,6 +22,7 @@ fn main() {
       "node.c",
       "parser.c",
       "stack.c",
+      "string_input.c",
       "subtree.c",
       "tree_cursor.c",
       "tree.c",

From 4603542747743e0f0bb1361a8cdb3d4abbb089b0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 10:44:14 -0700
Subject: [PATCH 017/208] Add more public methods and tests

---
 src/lib.rs | 134 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 124 insertions(+), 10 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index fa1db0f9..ef53e4de 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -253,15 +253,27 @@ impl<'a> Node<'a> {
         }
     }
 
-    pub fn name(&self) -> &'static str {
+    pub fn kind(&self) -> &'static str {
         unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap()
     }
 
-    pub fn start_index(&self) -> u32 {
+    pub fn is_named(&self) -> bool {
+        unsafe { ffi::ts_node_is_named(self.0) }
+    }
+
+    pub fn has_changes(&self) -> bool {
+        unsafe { ffi::ts_node_has_changes(self.0) }
+    }
+
+    pub fn has_error(&self) -> bool {
+        unsafe { ffi::ts_node_has_error(self.0) }
+    }
+
+    pub fn start_byte(&self) -> u32 {
         unsafe { ffi::ts_node_start_byte(self.0) }
     }
 
-    pub fn end_index(&self) -> u32 {
+    pub fn end_byte(&self) -> u32 {
         unsafe { ffi::ts_node_end_byte(self.0) }
     }
 
@@ -289,10 +301,34 @@ impl<'a> Node<'a> {
         unsafe { ffi::ts_node_child_count(self.0) }
     }
 
+    pub fn named_child(&self, i: u32) -> Option<Node> {
+        Self::new(unsafe { ffi::ts_node_named_child(self.0, i) })
+    }
+
+    pub fn named_child_count(&self) -> u32 {
+        unsafe { ffi::ts_node_named_child_count(self.0) }
+    }
+
     pub fn parent(&self) -> Option<Node> {
         Self::new(unsafe { ffi::ts_node_parent(self.0) })
     }
 
+    pub fn next_sibling(&self) -> Option<Node> {
+        Self::new(unsafe { ffi::ts_node_next_sibling(self.0) })
+    }
+
+    pub fn prev_sibling(&self) -> Option<Node> {
+        Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) })
+    }
+
+    pub fn next_named_sibling(&self) -> Option<Node> {
+        Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) })
+    }
+
+    pub fn prev_named_sibling(&self) -> Option<Node> {
+        Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) })
+    }
+
     pub fn to_sexp(&self) -> String {
         let c_string = unsafe { ffi::ts_node_string(self.0) };
         let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string();
@@ -304,26 +340,26 @@ impl<'a> Node<'a> {
 extern "C" { fn free(pointer: *mut c_void); }
 
 impl<'a> TreeCursor<'a> {
-    fn node(&'a self) -> Node<'a> {
+    pub fn node(&'a self) -> Node<'a> {
         Node(
             unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
             PhantomData,
         )
     }
 
-    fn goto_first_child(&mut self) -> bool {
+    pub fn goto_first_child(&mut self) -> bool {
         return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) };
     }
 
-    fn goto_parent(&mut self) -> bool {
+    pub fn goto_parent(&mut self) -> bool {
         return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) };
     }
 
-    fn goto_next_sibling(&mut self) -> bool {
+    pub fn goto_next_sibling(&mut self) -> bool {
         return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) };
     }
 
-    fn goto_first_child_for_index(&mut self, index: u32) -> Option<u32> {
+    pub fn goto_first_child_for_index(&mut self, index: u32) -> Option<u32> {
         let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index) };
         if result < 0 {
             None
@@ -378,7 +414,7 @@ mod tests {
         ", None).unwrap();
 
         let root_node = tree.root_node();
-        assert_eq!(root_node.name(), "source_file");
+        assert_eq!(root_node.kind(), "source_file");
 
         assert_eq!(
             root_node.to_sexp(),
@@ -386,7 +422,7 @@ mod tests {
         );
 
         let struct_node = root_node.child(0).unwrap();
-        assert_eq!(struct_node.name(), "struct_item");
+        assert_eq!(struct_node.kind(), "struct_item");
     }
 
     #[test]
@@ -407,4 +443,82 @@ mod tests {
         assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string())));
         assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
     }
+
+    #[test]
+    fn test_tree_cursor() {
+        let mut parser = Parser::new();
+        parser.set_language(rust());
+
+        let tree = parser.parse_str("
+            struct Stuff {
+                a: A;
+                b: Option<B>,
+            }
+        ", None).unwrap();
+
+        let mut cursor = tree.walk();
+        assert_eq!(cursor.node().kind(), "source_file");
+
+        assert!(cursor.goto_first_child());
+        assert_eq!(cursor.node().kind(), "struct_item");
+
+        assert!(cursor.goto_first_child());
+        assert_eq!(cursor.node().kind(), "struct");
+        assert_eq!(cursor.node().is_named(), false);
+
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), "type_identifier");
+        assert_eq!(cursor.node().is_named(), true);
+
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), "field_declaration_list");
+        assert_eq!(cursor.node().is_named(), true);
+    }
+
+    #[test]
+    fn test_custom_utf8_input() {
+        struct LineBasedInput {
+            lines: &'static [&'static str],
+            row: usize,
+            column: usize,
+        }
+
+        impl Utf8Input for LineBasedInput {
+            fn read(&mut self) -> &[u8] {
+                if self.row < self.lines.len() {
+                    let result = &self.lines[self.row].as_bytes()[self.column..];
+                    self.row += 1;
+                    self.column = 0;
+                    result
+                } else {
+                    &[]
+                }
+            }
+
+            fn seek(&mut self, _byte: u32, position: Point) {
+                self.row = position.row as usize;
+                self.column = position.column as usize;
+            }
+        }
+
+        let mut parser = Parser::new();
+        parser.set_language(rust());
+
+        let mut input = LineBasedInput {
+            lines: &[
+                "pub fn main() {",
+                "}",
+            ],
+            row: 0,
+            column: 0
+        };
+
+        let tree = parser.parse_utf8(&mut input, None).unwrap();
+        let root = tree.root_node();
+        assert_eq!(root.kind(), "source_file");
+        assert_eq!(root.has_error(), false);
+
+        let child = root.child(0).unwrap();
+        assert_eq!(child.kind(), "function_item");
+    }
 }

From e10a817704c3982b4ed41928b2b504cdbdbaf702 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 10:55:42 -0700
Subject: [PATCH 018/208] Switch back to default c compiler on travis

---
 .travis.yml | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5b99d596..10fcfe94 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,21 +4,11 @@ rust:
   - stable
 
 env:
-  - CC=clang-3.6 RUST_TREE_SITTER_TEST=1
+  - RUST_TREE_SITTER_TEST=1
 
 before_install:
   - ./script/fetch-test-fixtures.sh
 
-compiler: clang-3.6
-
-addons:
-  apt:
-    sources:
-      - llvm-toolchain-precise-3.6
-      - ubuntu-toolchain-r-test
-    packages:
-      - clang-3.6
-
 branches:
   only:
   - master

From 870dc11f791425f441eb6e84f86332f4a6b1a21a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 11:15:37 -0700
Subject: [PATCH 019/208] Implement Eq and Debug for Node

---
 src/lib.rs | 66 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index ef53e4de..2ecc7341 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,13 +1,11 @@
 mod ffi;
 
+use std::fmt;
 use std::ffi::CStr;
 use std::marker::PhantomData;
 use std::os::raw::{c_char, c_int, c_void};
 use std::ptr;
 
-#[derive(Clone, Copy)]
-pub struct Symbol(ffi::TSSymbol);
-
 pub type Language = *const ffi::TSLanguage;
 
 pub trait Utf16Input {
@@ -26,13 +24,13 @@ pub enum LogType {
     Lex,
 }
 
-#[derive(Clone, Copy, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct Point {
     pub row: u32,
     pub column: u32,
 }
 
-#[derive(Clone, Copy, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct InputEdit {
     pub start_byte: u32,
     pub old_end_byte: u32,
@@ -63,9 +61,19 @@ impl Parser {
         }
     }
 
-    pub fn set_language(&mut self, language: Language) {
+    pub fn set_language(&mut self, language: Language) -> Result<(), String> {
         unsafe {
-            ffi::ts_parser_set_language(self.0, language);
+            let version = ffi::ts_language_version(language) as usize;
+            if version == ffi::TREE_SITTER_LANGUAGE_VERSION {
+                ffi::ts_parser_set_language(self.0, language);
+                Ok(())
+            } else {
+                Err(format!(
+                    "Incompatible language version {}. Expected {}.",
+                    version,
+                    ffi::TREE_SITTER_LANGUAGE_VERSION
+                ))
+            }
         }
     }
 
@@ -253,6 +261,10 @@ impl<'a> Node<'a> {
         }
     }
 
+    pub fn kind_id(&self) -> u16 {
+        unsafe { ffi::ts_node_symbol(self.0) }
+    }
+
     pub fn kind(&self) -> &'static str {
         unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap()
     }
@@ -330,6 +342,8 @@ impl<'a> Node<'a> {
     }
 
     pub fn to_sexp(&self) -> String {
+        extern "C" { fn free(pointer: *mut c_void); }
+
         let c_string = unsafe { ffi::ts_node_string(self.0) };
         let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string();
         unsafe { free(c_string as *mut c_void) };
@@ -337,7 +351,17 @@ impl<'a> Node<'a> {
     }
 }
 
-extern "C" { fn free(pointer: *mut c_void); }
+impl<'a> PartialEq for Node<'a> {
+    fn eq(&self, other: &Self) -> bool {
+        self.0.id == other.0.id
+    }
+}
+
+impl<'a> fmt::Debug for Node<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "{{Node {} {} - {}}}", self.kind(), self.start_position(), self.end_position())
+    }
+}
 
 impl<'a> TreeCursor<'a> {
     pub fn node(&'a self) -> Node<'a> {
@@ -375,6 +399,12 @@ impl<'a> Drop for TreeCursor<'a> {
     }
 }
 
+impl fmt::Display for Point {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "({}, {})", self.row, self.column)
+    }
+}
+
 impl Into<ffi::TSPoint> for Point {
     fn into(self) -> ffi::TSPoint {
         ffi::TSPoint {
@@ -406,7 +436,7 @@ mod tests {
     #[test]
     fn test_basic_parsing() {
         let mut parser = Parser::new();
-        parser.set_language(rust());
+        parser.set_language(rust()).unwrap();
 
         let tree = parser.parse_str("
             struct Stuff {}
@@ -428,7 +458,7 @@ mod tests {
     #[test]
     fn test_logging() {
         let mut parser = Parser::new();
-        parser.set_language(rust());
+        parser.set_language(rust()).unwrap();
 
         let mut messages = Vec::new();
         parser.set_logger(Some(&mut |log_type, message| {
@@ -447,7 +477,7 @@ mod tests {
     #[test]
     fn test_tree_cursor() {
         let mut parser = Parser::new();
-        parser.set_language(rust());
+        parser.set_language(rust()).unwrap();
 
         let tree = parser.parse_str("
             struct Stuff {
@@ -502,7 +532,7 @@ mod tests {
         }
 
         let mut parser = Parser::new();
-        parser.set_language(rust());
+        parser.set_language(rust()).unwrap();
 
         let mut input = LineBasedInput {
             lines: &[
@@ -521,4 +551,16 @@ mod tests {
         let child = root.child(0).unwrap();
         assert_eq!(child.kind(), "function_item");
     }
+
+    #[test]
+    fn test_node_equality() {
+        let mut parser = Parser::new();
+        parser.set_language(rust()).unwrap();
+        let tree = parser.parse_str("struct A {}", None).unwrap();
+        let node1 = tree.root_node();
+        let node2 = tree.root_node();
+        assert_eq!(node1, node2);
+        assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
+        assert_ne!(node1.child(0).unwrap(), node2);
+    }
 }

From a27ac49dea32cb296ff4ebdd939c7fa01a3d72e7 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 11:42:13 -0700
Subject: [PATCH 020/208] Flesh out README

---
 README.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/README.md b/README.md
index 40f5624f..43270713 100644
--- a/README.md
+++ b/README.md
@@ -6,4 +6,94 @@ Rust Tree-sitter
 
 Rust bindings to the [Tree-sitter][] parsing library.
 
+### Basic Usage
+
+First, create a parser:
+
+```rust
+let parser = Parser::new();
+```
+
+Then assign a language to the parser. Tree-sitter languages consist of generated C code. To use them from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`:
+
+```rust
+extern "C" fn tree_sitter_c() -> Language;
+extern "C" fn tree_sitter_rust() -> Language;
+extern "C" fn tree_sitter_javascript() -> Language;
+
+parser.set_language(unsafe { tree_sitter_rust() }).unwrap();
+```
+
+Now you can parse source code:
+
+```rust
+let source_code = "fn test() {}";
+
+let tree = parser.parse_str(source_code, None);
+let root_node = tree.root_node();
+assert_eq!(root_node.kind(), "source_file");
+assert_eq!(root_node.start_position().column, 0);
+assert_eq!(root_node.end_position().column, 12);
+```
+
+### Editing
+
+Once you have a syntax tree, you can update it when your source code changes:
+
+```rust
+let new_source_code = "fn test(a: u32) {}"
+
+tree.edit(InputEdit {
+  start_byte: 8,
+  old_end_byte: 8,
+  new_end_byte: 14,
+  start_position: Point::new(0, 8),
+  old_end_position: Point::new(0, 8),
+  new_end_position: Point::new(0, 14),
+});
+let new_tree = parser.parse_str(new_source_code, Some(tree));
+```
+
+### Text Input
+
+
+The code can be provided either as a simple string or by any type that implements Tree-sitter's `Utf8Input` or `Utf16Input` traits:
+
+```rust
+struct LineWiseInput {
+    lines: &'static [&'static str],
+    row: usize,
+    column: usize,
+}
+
+impl tree_sitter::Utf8Input for LineWiseInput {
+    fn read(&mut self) -> &[u8] {
+        if self.row < self.lines.len() {
+            let result = &self.lines[self.row].as_bytes()[self.column..];
+            self.row += 1;
+            self.column = 0;
+            result
+        } else {
+            &[]
+        }
+    }
+
+    fn seek(&mut self, _byte: u32, position: Point) {
+        self.row = position.row as usize;
+        self.column = position.column as usize;
+    }
+}
+
+let mut input = LineBasedInput {
+    lines: &[
+        "pub fn main() {",
+        "}",
+    ],
+    row: 0,
+    column: 0
+};
+
+let tree = parser.parse_utf8(&mut input, None).unwrap();
+```
+
 [tree-sitter]: https://github.com/tree-sitter/tree-sitter

From c0b49e99357fbe25d62d800c9da2fd47566e9b31 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 11:51:46 -0700
Subject: [PATCH 021/208] Fix include globs in package manifest

---
 Cargo.toml | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index e20d40aa..560d9a71 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,12 +1,19 @@
 [package]
 name = "tree-sitter"
+description = "Rust bindings to the Tree-sitter parsing library"
 version = "0.1.0"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
-exclude = ["vendor/tree-sitter/**/*"]
+license = "MIT"
 include = [
-  "vendor/tree-sitter/src/runtime/*",
-  "vendor/tree-sitter/externals/utf8proc/utf8proc*"
+  "/build.rs",
+  "/Cargo.toml",
+  "/LICENSE",
+  "/README.md",
+  "/src/*",
+  "/vendor/tree-sitter/externals/utf8proc/utf8proc*",
+  "/vendor/tree-sitter/include/*",
+  "/vendor/tree-sitter/src/runtime/*",
 ]
 
 [build-dependencies]

From e6d580597d5925f3d43bf01b2101d6e0ca9643fc Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 12:02:52 -0700
Subject: [PATCH 022/208] Add crates.io badge to README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 43270713..da6e1a80 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@ Rust Tree-sitter
 
 [![Build Status](https://travis-ci.org/tree-sitter/rust-tree-sitter.svg)](https://travis-ci.org/tree-sitter/rust-tree-sitter)
 [![Build status](https://ci.appveyor.com/api/projects/status/d0f6vqq3rflxx3y6/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/rust-tree-sitter/branch/master)
+[![Crates.io](https://img.shields.io/crates/v/tree-sitter.svg)](https://crates.io/crates/tree-sitter)
 
 Rust bindings to the [Tree-sitter][] parsing library.
 

From 819b14070123c4f6c61aa73c72654ce1b97fef16 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 14:06:49 -0700
Subject: [PATCH 023/208] Make set_logger take a boxed function

---
 src/lib.rs | 68 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 2ecc7341..5ef80f70 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -24,6 +24,8 @@ pub enum LogType {
     Lex,
 }
 
+type Logger<'a> = Box<FnMut(LogType, &str) + 'a>;
+
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct Point {
     pub row: u32,
@@ -44,7 +46,7 @@ pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
 
 pub struct Parser(*mut ffi::TSParser);
 
-pub struct Tree(*mut ffi::TSTree, ffi::TSInputEncoding);
+pub struct Tree(*mut ffi::TSTree);
 
 pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
@@ -77,28 +79,42 @@ impl Parser {
         }
     }
 
-    pub fn set_logger<F: FnMut(LogType, &str) -> ()>(&mut self, logger: Option<&mut F>) {
-        unsafe extern "C" fn log<F: FnMut(LogType, &str) -> ()>(
-            payload: *mut c_void,
-            c_log_type: ffi::TSLogType,
-            c_message: *const c_char,
-        ) {
-            let callback = (payload as *mut F).as_mut().unwrap();
-            if let Ok(message) = CStr::from_ptr(c_message).to_str() {
-                let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse {
-                    LogType::Parse
-                } else {
-                    LogType::Lex
-                };
-                callback(log_type, message);
-            }
-        };
+    pub fn logger(&self) -> Option<&Logger> {
+        let logger = unsafe { ffi::ts_parser_logger(self.0) };
+        unsafe { (logger.payload as *mut Logger).as_ref() }
+    }
+
+    pub fn set_logger(&mut self, logger: Option<Logger>) {
+        let prev_logger = unsafe { ffi::ts_parser_logger(self.0) };
+        if !prev_logger.payload.is_null() {
+            unsafe { Box::from_raw(prev_logger.payload as *mut Logger) };
+        }
 
         let c_logger;
         if let Some(logger) = logger {
+            let container = Box::new(logger);
+
+            unsafe extern "C" fn log(
+                payload: *mut c_void,
+                c_log_type: ffi::TSLogType,
+                c_message: *const c_char,
+            ) {
+                let callback = (payload as *mut Logger).as_mut().unwrap();
+                if let Ok(message) = CStr::from_ptr(c_message).to_str() {
+                    let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse {
+                        LogType::Parse
+                    } else {
+                        LogType::Lex
+                    };
+                    callback(log_type, message);
+                }
+            };
+
+            let raw_container = Box::into_raw(container);
+
             c_logger = ffi::TSLogger {
-                payload: logger as *mut F as *mut c_void,
-                log: Some(log::<F>),
+                payload: raw_container as *mut c_void,
+                log: Some(log),
             };
         } else {
             c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None };
@@ -156,7 +172,7 @@ impl Parser {
         if new_tree_ptr.is_null() {
             None
         } else {
-            Some(Tree(new_tree_ptr, ffi::TSInputEncoding_TSInputEncodingUTF8))
+            Some(Tree(new_tree_ptr))
         }
     }
 
@@ -204,16 +220,14 @@ impl Parser {
         if new_tree_ptr.is_null() {
             None
         } else {
-            Some(Tree(
-                new_tree_ptr,
-                ffi::TSInputEncoding_TSInputEncodingUTF16,
-            ))
+            Some(Tree(new_tree_ptr))
         }
     }
 }
 
 impl Drop for Parser {
     fn drop(&mut self) {
+        self.set_logger(None);
         unsafe { ffi::ts_parser_delete(self.0) }
     }
 }
@@ -248,7 +262,7 @@ impl Drop for Tree {
 
 impl Clone for Tree {
     fn clone(&self) -> Tree {
-        unsafe { Tree(ffi::ts_tree_copy(self.0), self.1) }
+        unsafe { Tree(ffi::ts_tree_copy(self.0)) }
     }
 }
 
@@ -461,9 +475,9 @@ mod tests {
         parser.set_language(rust()).unwrap();
 
         let mut messages = Vec::new();
-        parser.set_logger(Some(&mut |log_type, message| {
+        parser.set_logger(Some(Box::new(|log_type, message| {
             messages.push((log_type, message.to_string()));
-        }));
+        })));
 
         parser.parse_str("
             struct Stuff {}

From 4da669ce8d23cbfaeaba2d2c5969b678779ff0e9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 14:27:08 -0700
Subject: [PATCH 024/208] Fix bugs in editing/reparsing

---
 README.md          |   2 +-
 src/lib.rs         | 101 ++++++++++++++++++++++++++++++++++++++++-----
 vendor/tree-sitter |   2 +-
 3 files changed, 92 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index da6e1a80..d0806bbb 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ tree.edit(InputEdit {
   old_end_position: Point::new(0, 8),
   new_end_position: Point::new(0, 14),
 });
-let new_tree = parser.parse_str(new_source_code, Some(tree));
+let new_tree = parser.parse_str(new_source_code, Some(&tree));
 ```
 
 ### Text Input
diff --git a/src/lib.rs b/src/lib.rs
index 5ef80f70..0ac1300e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -123,7 +123,7 @@ impl Parser {
         unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
     }
 
-    pub fn parse_str(&mut self, input: &str, old_tree: Option<Tree>) -> Option<Tree> {
+    pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
         let mut input = FlatInput { bytes: input.as_bytes(), offset: 0};
         self.parse_utf8(&mut input, old_tree)
     }
@@ -131,7 +131,7 @@ impl Parser {
     pub fn parse_utf8<T: Utf8Input>(
         &mut self,
         input: &mut T,
-        old_tree: Option<Tree>,
+        old_tree: Option<&Tree>,
     ) -> Option<Tree> {
         unsafe extern "C" fn read<T: Utf8Input>(
             payload: *mut c_void,
@@ -179,7 +179,7 @@ impl Parser {
     pub fn parse_utf16<T: Utf16Input>(
         &mut self,
         input: &mut T,
-        old_tree: Option<Tree>,
+        old_tree: Option<&Tree>,
     ) -> Option<Tree> {
         unsafe extern "C" fn read<T: Utf16Input>(
             payload: *mut c_void,
@@ -266,7 +266,7 @@ impl Clone for Tree {
     }
 }
 
-impl<'a> Node<'a> {
+impl<'tree> Node<'tree> {
     fn new(node: ffi::TSNode) -> Option<Self> {
         if node.id.is_null() {
             None
@@ -319,7 +319,7 @@ impl<'a> Node<'a> {
         }
     }
 
-    pub fn child(&self, i: u32) -> Option<Node> {
+    pub fn child(&self, i: u32) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_child(self.0, i) })
     }
 
@@ -327,7 +327,7 @@ impl<'a> Node<'a> {
         unsafe { ffi::ts_node_child_count(self.0) }
     }
 
-    pub fn named_child(&self, i: u32) -> Option<Node> {
+    pub fn named_child<'a>(&'a self, i: u32) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_named_child(self.0, i) })
     }
 
@@ -335,23 +335,23 @@ impl<'a> Node<'a> {
         unsafe { ffi::ts_node_named_child_count(self.0) }
     }
 
-    pub fn parent(&self) -> Option<Node> {
+    pub fn parent(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_parent(self.0) })
     }
 
-    pub fn next_sibling(&self) -> Option<Node> {
+    pub fn next_sibling(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_next_sibling(self.0) })
     }
 
-    pub fn prev_sibling(&self) -> Option<Node> {
+    pub fn prev_sibling(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) })
     }
 
-    pub fn next_named_sibling(&self) -> Option<Node> {
+    pub fn next_named_sibling(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) })
     }
 
-    pub fn prev_named_sibling(&self) -> Option<Node> {
+    pub fn prev_named_sibling(&self) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) })
     }
 
@@ -413,6 +413,12 @@ impl<'a> Drop for TreeCursor<'a> {
     }
 }
 
+impl Point {
+    pub fn new(row: u32, column: u32) -> Self {
+        Point { row, column }
+    }
+}
+
 impl fmt::Display for Point {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         write!(f, "({}, {})", self.row, self.column)
@@ -577,4 +583,77 @@ mod tests {
         assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
         assert_ne!(node1.child(0).unwrap(), node2);
     }
+
+    #[test]
+    fn test_editing() {
+        struct SpyInput {
+            bytes: &'static [u8],
+            offset: usize,
+            bytes_read: Vec<u8>,
+        }
+
+        impl Utf8Input for SpyInput {
+            fn read(&mut self) -> &[u8] {
+                if self.offset < self.bytes.len() {
+                    let result = &self.bytes[self.offset..self.offset + 1];
+                    self.bytes_read.extend(result.iter());
+                    self.offset += 1;
+                    result
+                } else {
+                    &[]
+                }
+            }
+
+            fn seek(&mut self, byte: u32, _position: Point) {
+                self.offset = byte as usize;
+            }
+        }
+
+        let mut input = SpyInput {
+            bytes: "fn test(a: A, c: C) {}".as_bytes(),
+            offset: 0,
+            bytes_read: Vec::new(),
+        };
+
+        let mut parser = Parser::new();
+        parser.set_language(rust()).unwrap();
+
+        let mut tree = parser.parse_utf8(&mut input, None).unwrap();
+        let parameters_sexp = tree.root_node()
+            .named_child(0).unwrap()
+            .named_child(1).unwrap()
+            .to_sexp();
+        assert_eq!(
+            parameters_sexp,
+            "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
+        );
+
+        input.offset = 0;
+        input.bytes_read.clear();
+        input.bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
+        tree.edit(&InputEdit{
+            start_byte: 14,
+            old_end_byte: 14,
+            new_end_byte: 20,
+            start_position: Point::new(0, 14),
+            old_end_position: Point::new(0, 14),
+            new_end_position: Point::new(0, 20),
+        });
+
+        let tree = parser.parse_utf8(&mut input, Some(&tree)).unwrap();
+        let parameters_sexp = tree.root_node()
+            .named_child(0).unwrap()
+            .named_child(1).unwrap()
+            .to_sexp();
+        assert_eq!(
+            parameters_sexp,
+            "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
+        );
+
+        let retokenized_content = String::from_utf8(input.bytes_read).unwrap();
+        assert!(retokenized_content.contains("b: B"));
+        assert!(!retokenized_content.contains("a: A"));
+        assert!(!retokenized_content.contains("c: C"));
+        assert!(!retokenized_content.contains("{}"));
+    }
 }
diff --git a/vendor/tree-sitter b/vendor/tree-sitter
index 9c1e82a7..78f28b14 160000
--- a/vendor/tree-sitter
+++ b/vendor/tree-sitter
@@ -1 +1 @@
-Subproject commit 9c1e82a7eac97767cee0469faa2722fd5753b065
+Subproject commit 78f28b14ce519ba085ab7886c2fc19739f7f7da0

From 45660e7b4e5db579905924717fa4da22f6a1d97d Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 14:27:08 -0700
Subject: [PATCH 025/208] Make syntax trees implement Send

---
 src/lib.rs | 107 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 84 insertions(+), 23 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 0ac1300e..6084516c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -254,6 +254,14 @@ impl Tree {
     }
 }
 
+unsafe impl Send for Tree {}
+
+impl fmt::Debug for Tree {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "{{Tree {:?}}}", self.root_node())
+    }
+}
+
 impl Drop for Tree {
     fn drop(&mut self) {
         unsafe { ffi::ts_tree_delete(self.0) }
@@ -448,6 +456,7 @@ impl<'a> Utf8Input for FlatInput<'a> {
 
 #[cfg(test)]
 mod tests {
+    use std::thread;
     use super::*;
 
     fn rust() -> Language { unsafe { tree_sitter_rust() } }
@@ -586,29 +595,6 @@ mod tests {
 
     #[test]
     fn test_editing() {
-        struct SpyInput {
-            bytes: &'static [u8],
-            offset: usize,
-            bytes_read: Vec<u8>,
-        }
-
-        impl Utf8Input for SpyInput {
-            fn read(&mut self) -> &[u8] {
-                if self.offset < self.bytes.len() {
-                    let result = &self.bytes[self.offset..self.offset + 1];
-                    self.bytes_read.extend(result.iter());
-                    self.offset += 1;
-                    result
-                } else {
-                    &[]
-                }
-            }
-
-            fn seek(&mut self, byte: u32, _position: Point) {
-                self.offset = byte as usize;
-            }
-        }
-
         let mut input = SpyInput {
             bytes: "fn test(a: A, c: C) {}".as_bytes(),
             offset: 0,
@@ -656,4 +642,79 @@ mod tests {
         assert!(!retokenized_content.contains("c: C"));
         assert!(!retokenized_content.contains("{}"));
     }
+
+    #[test]
+    fn test_parallel_parsing() {
+        // Parse this source file so that each thread has a non-trivial amount of
+        // work to do.
+        let this_file_source = include_str!("lib.rs");
+
+        let mut parser = Parser::new();
+        parser.set_language(rust()).unwrap();
+        let tree = parser.parse_str(this_file_source, None).unwrap();
+
+        let mut parse_threads = Vec::new();
+        for thread_id in 1..5 {
+            let mut tree_clone = tree.clone();
+            parse_threads.push(thread::spawn(move || {
+
+                // For each thread, prepend a different number of declarations to the
+                // source code.
+                let mut prepend_line_count = 0;
+                let mut prepended_source = String::new();
+                for _ in 0..thread_id {
+                    prepend_line_count += 2;
+                    prepended_source += "struct X {}\n\n";
+                }
+
+                tree_clone.edit(&InputEdit{
+                    start_byte: 0,
+                    old_end_byte: 0,
+                    new_end_byte: prepended_source.len() as u32,
+                    start_position: Point::new(0, 0),
+                    old_end_position: Point::new(0, 0),
+                    new_end_position: Point::new(prepend_line_count, 0),
+                });
+                prepended_source += this_file_source;
+
+                // Reparse using the old tree as a starting point.
+                let mut parser = Parser::new();
+                parser.set_language(rust()).unwrap();
+                parser.parse_str(&prepended_source, Some(&tree_clone)).unwrap()
+            }));
+        }
+
+        // Check that the trees have the expected relationship to one another.
+        let trees = parse_threads
+            .into_iter()
+            .map(|thread| thread.join().unwrap());
+        let child_count_differences = trees
+            .map(|t| t.root_node().child_count() - tree.root_node().child_count())
+            .collect::<Vec<_>>();
+
+        assert_eq!(child_count_differences, &[1, 2, 3, 4]);
+    }
+
+    struct SpyInput {
+        bytes: &'static [u8],
+        offset: usize,
+        bytes_read: Vec<u8>,
+    }
+
+    impl Utf8Input for SpyInput {
+        fn read(&mut self) -> &[u8] {
+            if self.offset < self.bytes.len() {
+                let result = &self.bytes[self.offset..self.offset + 1];
+                self.bytes_read.extend(result.iter());
+                self.offset += 1;
+                result
+            } else {
+                &[]
+            }
+        }
+
+        fn seek(&mut self, byte: u32, _position: Point) {
+            self.offset = byte as usize;
+        }
+    }
 }

From 0034fce8093374bc5193727c96d45d98b9816a32 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 15:05:31 -0700
Subject: [PATCH 026/208] Add some fields to the cargo manifest

---
 Cargo.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Cargo.toml b/Cargo.toml
index 560d9a71..13c84759 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,10 @@ version = "0.1.0"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
 license = "MIT"
+readme = "README.md"
+keywords = ["incremental", "parsing"]
+categories = ["parsing", "text editors", "api bindings"]
+
 include = [
   "/build.rs",
   "/Cargo.toml",

From 16a7366ec75f5c03d497a12bb796d883bfd32466 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 May 2018 15:06:05 -0700
Subject: [PATCH 027/208] 0.1.1

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 13c84759..12d92923 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.1.0"
+version = "0.1.1"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
 license = "MIT"

From 5efc28f2f3741e9f3b1ff376be5de2890df80ed0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 19 Jun 2018 16:19:37 -0700
Subject: [PATCH 028/208] Update to latest tree-sitter API

---
 README.md          |  66 ++++++-----
 build.rs           |   1 -
 src/bindings.rs    |  22 +++-
 src/lib.rs         | 273 +++++++++++++++++++++------------------------
 vendor/tree-sitter |   2 +-
 5 files changed, 175 insertions(+), 189 deletions(-)

diff --git a/README.md b/README.md
index d0806bbb..ff7140c5 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,10 @@ Rust bindings to the [Tree-sitter][] parsing library.
 First, create a parser:
 
 ```rust
+use tree_sitter::{Parser, Language};
+
+// ...
+
 let parser = Parser::new();
 ```
 
@@ -22,16 +26,17 @@ extern "C" fn tree_sitter_c() -> Language;
 extern "C" fn tree_sitter_rust() -> Language;
 extern "C" fn tree_sitter_javascript() -> Language;
 
-parser.set_language(unsafe { tree_sitter_rust() }).unwrap();
+let language = unsafe { tree_sitter_rust() };
+parser.set_language(language).unwrap();
 ```
 
 Now you can parse source code:
 
 ```rust
 let source_code = "fn test() {}";
-
 let tree = parser.parse_str(source_code, None);
 let root_node = tree.root_node();
+
 assert_eq!(root_node.kind(), "source_file");
 assert_eq!(root_node.start_position().column, 0);
 assert_eq!(root_node.end_position().column, 12);
@@ -39,7 +44,7 @@ assert_eq!(root_node.end_position().column, 12);
 
 ### Editing
 
-Once you have a syntax tree, you can update it when your source code changes:
+Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly:
 
 ```rust
 let new_source_code = "fn test(a: u32) {}"
@@ -52,49 +57,42 @@ tree.edit(InputEdit {
   old_end_position: Point::new(0, 8),
   new_end_position: Point::new(0, 14),
 });
+
 let new_tree = parser.parse_str(new_source_code, Some(&tree));
 ```
 
 ### Text Input
 
-
-The code can be provided either as a simple string or by any type that implements Tree-sitter's `Utf8Input` or `Utf16Input` traits:
+The source code to parse can be provided either as a string or as a function that returns text encoded as either UTF8 or UTF16:
 
 ```rust
-struct LineWiseInput {
-    lines: &'static [&'static str],
-    row: usize,
-    column: usize,
-}
+// Store some source code in an array of lines.
+let lines = &[
+    "pub fn foo() {",
+    "  1",
+    "}",
+];
 
-impl tree_sitter::Utf8Input for LineWiseInput {
-    fn read(&mut self) -> &[u8] {
-        if self.row < self.lines.len() {
-            let result = &self.lines[self.row].as_bytes()[self.column..];
-            self.row += 1;
-            self.column = 0;
-            result
+// Parse the source code using a custom callback. The callback is called
+// with both a byte offset and a row/column offset.
+let tree = parser.parse_utf8(&mut |_byte: u32, position: Point| -> &[u8] {
+    let row = position.row as usize;
+    let column = position.column as usize;
+    if row < lines.len() {
+        if column < lines[row].as_bytes().len() {
+            &lines[row].as_bytes()[column..]
         } else {
-            &[]
+            "\n".as_bytes()
         }
+    } else {
+        &[]
     }
+}, None).unwrap();
 
-    fn seek(&mut self, _byte: u32, position: Point) {
-        self.row = position.row as usize;
-        self.column = position.column as usize;
-    }
-}
-
-let mut input = LineBasedInput {
-    lines: &[
-        "pub fn main() {",
-        "}",
-    ],
-    row: 0,
-    column: 0
-};
-
-let tree = parser.parse_utf8(&mut input, None).unwrap();
+assert_eq!(
+  tree.root_node().to_sexp(),
+  "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"
+);
 ```
 
 [tree-sitter]: https://github.com/tree-sitter/tree-sitter
diff --git a/build.rs b/build.rs
index 2843c758..7d9ee83e 100644
--- a/build.rs
+++ b/build.rs
@@ -22,7 +22,6 @@ fn main() {
       "node.c",
       "parser.c",
       "stack.c",
-      "string_input.c",
       "subtree.c",
       "tree_cursor.c",
       "tree.c",
diff --git a/src/bindings.rs b/src/bindings.rs
index 1ab49bde..b2d83729 100644
--- a/src/bindings.rs
+++ b/src/bindings.rs
@@ -41,15 +41,12 @@ pub struct TSRange {
 pub struct TSInput {
     pub payload: *mut ::std::os::raw::c_void,
     pub read: ::std::option::Option<
-        unsafe extern "C" fn(payload: *mut ::std::os::raw::c_void, bytes_read: *mut u32)
-            -> *const ::std::os::raw::c_char,
-    >,
-    pub seek: ::std::option::Option<
         unsafe extern "C" fn(
             payload: *mut ::std::os::raw::c_void,
             byte_index: u32,
             position: TSPoint,
-        ) -> ::std::os::raw::c_int,
+            bytes_read: *mut u32,
+        ) -> *const ::std::os::raw::c_char,
     >,
     pub encoding: TSInputEncoding,
 }
@@ -127,6 +124,21 @@ extern "C" {
         arg4: u32,
     ) -> *mut TSTree;
 }
+extern "C" {
+    pub fn ts_parser_enabled(arg1: *const TSParser) -> bool;
+}
+extern "C" {
+    pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool);
+}
+extern "C" {
+    pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize;
+}
+extern "C" {
+    pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize);
+}
+extern "C" {
+    pub fn ts_parser_reset(arg1: *mut TSParser);
+}
 extern "C" {
     pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
 }
diff --git a/src/lib.rs b/src/lib.rs
index 6084516c..84d51f04 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,21 +3,11 @@ mod ffi;
 use std::fmt;
 use std::ffi::CStr;
 use std::marker::PhantomData;
-use std::os::raw::{c_char, c_int, c_void};
+use std::os::raw::{c_char, c_void};
 use std::ptr;
 
 pub type Language = *const ffi::TSLanguage;
 
-pub trait Utf16Input {
-    fn read(&mut self) -> &[u16];
-    fn seek(&mut self, u32, Point);
-}
-
-pub trait Utf8Input {
-    fn read(&mut self) -> &[u8];
-    fn seek(&mut self, u32, Point);
-}
-
 #[derive(Debug, PartialEq, Eq)]
 pub enum LogType {
     Parse,
@@ -50,11 +40,6 @@ pub struct Tree(*mut ffi::TSTree);
 
 pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
-struct FlatInput<'a> {
-    bytes: &'a [u8],
-    offset: usize,
-}
-
 impl Parser {
     pub fn new() -> Parser {
         unsafe {
@@ -124,105 +109,86 @@ impl Parser {
     }
 
     pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
-        let mut input = FlatInput { bytes: input.as_bytes(), offset: 0};
-        self.parse_utf8(&mut input, old_tree)
+        let bytes = input.as_bytes();
+        self.parse_utf8(&mut |offset, _| &bytes[(offset as usize)..], old_tree)
     }
 
-    pub fn parse_utf8<T: Utf8Input>(
+    pub fn parse_utf8<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>(
         &mut self,
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        unsafe extern "C" fn read<T: Utf8Input>(
+        unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>(
             payload: *mut c_void,
+            byte_offset: u32,
+            position: ffi::TSPoint,
             bytes_read: *mut u32,
         ) -> *const c_char {
             let input = (payload as *mut T).as_mut().unwrap();
-            let result = input.read();
+            let result = (*input)(byte_offset, position.into());
             *bytes_read = result.len() as u32;
             return result.as_ptr() as *const c_char;
         };
 
-        unsafe extern "C" fn seek<T: Utf8Input>(
-            payload: *mut c_void,
-            byte: u32,
-            position: ffi::TSPoint,
-        ) -> c_int {
-            let input = (payload as *mut T).as_mut().unwrap();
-            input.seek(
-                byte,
-                Point {
-                    row: position.row,
-                    column: position.column,
-                },
-            );
-            return 1;
-        };
-
         let c_input = ffi::TSInput {
             payload: input as *mut T as *mut c_void,
-            read: Some(read::<T>),
-            seek: Some(seek::<T>),
+            read: Some(read::<'a, T>),
             encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
         };
 
-        let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
+        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
 
-        let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
-        if new_tree_ptr.is_null() {
+        let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
+        if c_new_tree.is_null() {
             None
         } else {
-            Some(Tree(new_tree_ptr))
+            Some(Tree(c_new_tree))
         }
     }
 
-    pub fn parse_utf16<T: Utf16Input>(
+    pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
         &mut self,
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        unsafe extern "C" fn read<T: Utf16Input>(
+        unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
             payload: *mut c_void,
+            byte_offset: u32,
+            position: ffi::TSPoint,
             bytes_read: *mut u32,
         ) -> *const c_char {
             let input = (payload as *mut T).as_mut().unwrap();
-            let result = input.read();
+            let result = (*input)(byte_offset, Point {
+                row: position.row,
+                column: position.column / 2,
+            });
             *bytes_read = result.len() as u32 * 2;
             return result.as_ptr() as *const c_char;
         };
 
-        unsafe extern "C" fn seek<T: Utf16Input>(
-            payload: *mut c_void,
-            byte: u32,
-            position: ffi::TSPoint,
-        ) -> c_int {
-            let input = (payload as *mut T).as_mut().unwrap();
-            input.seek(
-                byte / 2,
-                Point {
-                    row: position.row,
-                    column: position.column / 2,
-                },
-            );
-            return 1;
-        };
-
         let c_input = ffi::TSInput {
             payload: input as *mut T as *mut c_void,
-            read: Some(read::<T>),
-            seek: Some(seek::<T>),
-            encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
+            read: Some(read::<'a, T>),
+            encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
         };
 
-        let old_tree_ptr = old_tree.map_or(ptr::null_mut(), |t| t.0);
+        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
 
-        let new_tree_ptr = unsafe { ffi::ts_parser_parse(self.0, old_tree_ptr, c_input) };
-        if new_tree_ptr.is_null() {
+        let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
+        if c_new_tree.is_null() {
             None
         } else {
-            Some(Tree(new_tree_ptr))
+            Some(Tree(c_new_tree))
         }
     }
+
+    pub fn reset(&mut self) {
+        unsafe { ffi::ts_parser_reset(self.0) }
+    }
+
+    pub fn set_operation_limit(&mut self, limit: usize) {
+        unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
+    }
 }
 
 impl Drop for Parser {
@@ -442,15 +408,12 @@ impl Into<ffi::TSPoint> for Point {
     }
 }
 
-impl<'a> Utf8Input for FlatInput<'a> {
-    fn read(&mut self) -> &[u8] {
-        let result = &self.bytes[self.offset..];
-        self.offset = self.bytes.len();
-        result
-    }
-
-    fn seek(&mut self, offset: u32, _position: Point) {
-        self.offset = offset as usize;
+impl From<ffi::TSPoint> for Point {
+    fn from(point: ffi::TSPoint) -> Self {
+        Self {
+            row: point.row,
+            column: point.column,
+        }
     }
 }
 
@@ -536,49 +499,70 @@ mod tests {
 
     #[test]
     fn test_custom_utf8_input() {
-        struct LineBasedInput {
-            lines: &'static [&'static str],
-            row: usize,
-            column: usize,
-        }
-
-        impl Utf8Input for LineBasedInput {
-            fn read(&mut self) -> &[u8] {
-                if self.row < self.lines.len() {
-                    let result = &self.lines[self.row].as_bytes()[self.column..];
-                    self.row += 1;
-                    self.column = 0;
-                    result
-                } else {
-                    &[]
-                }
-            }
-
-            fn seek(&mut self, _byte: u32, position: Point) {
-                self.row = position.row as usize;
-                self.column = position.column as usize;
-            }
-        }
-
         let mut parser = Parser::new();
         parser.set_language(rust()).unwrap();
 
-        let mut input = LineBasedInput {
-            lines: &[
-                "pub fn main() {",
-                "}",
-            ],
-            row: 0,
-            column: 0
-        };
+        let lines = &[
+            "pub fn foo() {",
+            "  1",
+            "}",
+        ];
+
+        let tree = parser.parse_utf8(&mut |_, position| {
+            let row = position.row as usize;
+            let column = position.column as usize;
+            if row < lines.len() {
+                if column < lines[row].as_bytes().len() {
+                    &lines[row].as_bytes()[column..]
+                } else {
+                    "\n".as_bytes()
+                }
+            } else {
+                &[]
+            }
+        }, None).unwrap();
 
-        let tree = parser.parse_utf8(&mut input, None).unwrap();
         let root = tree.root_node();
+        assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))");
         assert_eq!(root.kind(), "source_file");
         assert_eq!(root.has_error(), false);
+        assert_eq!(root.child(0).unwrap().kind(), "function_item");
+    }
 
-        let child = root.child(0).unwrap();
-        assert_eq!(child.kind(), "function_item");
+    #[test]
+    fn test_custom_utf16_input() {
+        let mut parser = Parser::new();
+        parser.set_language(rust()).unwrap();
+
+        parser.set_logger(Some(Box::new(|t, message| {
+            println!("log: {:?} {}", t, message);
+        })));
+
+        let lines: Vec<Vec<u16>> = [
+            "pub fn foo() {",
+            "  1",
+            "}"
+        ].iter().map(|s| s.encode_utf16().collect()).collect();
+
+        let tree = parser.parse_utf16(&mut |_, position| {
+            let row = position.row as usize;
+            let column = position.column as usize;
+            if row < lines.len() {
+                if column < lines[row].len() {
+                    &lines[row][column..]
+                } else {
+                    &[10]
+                }
+            } else {
+                &[]
+            }
+        }, None).unwrap();
+
+        let root = tree.root_node();
+        assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))");
+        assert_eq!(root.kind(), "source_file");
+        assert_eq!(root.has_error(), false);
+        assert_eq!(root.child(0).unwrap().kind(), "function_item");
     }
 
     #[test]
@@ -595,16 +579,23 @@ mod tests {
 
     #[test]
     fn test_editing() {
-        let mut input = SpyInput {
-            bytes: "fn test(a: A, c: C) {}".as_bytes(),
-            offset: 0,
-            bytes_read: Vec::new(),
-        };
-
         let mut parser = Parser::new();
         parser.set_language(rust()).unwrap();
 
-        let mut tree = parser.parse_utf8(&mut input, None).unwrap();
+        let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
+        let mut input_bytes_read = Vec::new();
+
+        let mut tree = parser.parse_utf8(&mut |offset, _| {
+            let offset = offset as usize;
+            if offset < input_bytes.len() {
+                let result = &input_bytes[offset..offset + 1];
+                input_bytes_read.extend(result.iter());
+                result
+            } else {
+                &[]
+            }
+        }, None).unwrap();
+
         let parameters_sexp = tree.root_node()
             .named_child(0).unwrap()
             .named_child(1).unwrap()
@@ -614,9 +605,8 @@ mod tests {
             "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
         );
 
-        input.offset = 0;
-        input.bytes_read.clear();
-        input.bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
+        input_bytes_read.clear();
+        input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
         tree.edit(&InputEdit{
             start_byte: 14,
             old_end_byte: 14,
@@ -626,7 +616,17 @@ mod tests {
             new_end_position: Point::new(0, 20),
         });
 
-        let tree = parser.parse_utf8(&mut input, Some(&tree)).unwrap();
+        let tree = parser.parse_utf8(&mut |offset, _| {
+            let offset = offset as usize;
+            if offset < input_bytes.len() {
+                let result = &input_bytes[offset..offset + 1];
+                input_bytes_read.extend(result.iter());
+                result
+            } else {
+                &[]
+            }
+        }, Some(&tree)).unwrap();
+
         let parameters_sexp = tree.root_node()
             .named_child(0).unwrap()
             .named_child(1).unwrap()
@@ -636,7 +636,7 @@ mod tests {
             "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
         );
 
-        let retokenized_content = String::from_utf8(input.bytes_read).unwrap();
+        let retokenized_content = String::from_utf8(input_bytes_read).unwrap();
         assert!(retokenized_content.contains("b: B"));
         assert!(!retokenized_content.contains("a: A"));
         assert!(!retokenized_content.contains("c: C"));
@@ -694,27 +694,4 @@ mod tests {
 
         assert_eq!(child_count_differences, &[1, 2, 3, 4]);
     }
-
-    struct SpyInput {
-        bytes: &'static [u8],
-        offset: usize,
-        bytes_read: Vec<u8>,
-    }
-
-    impl Utf8Input for SpyInput {
-        fn read(&mut self) -> &[u8] {
-            if self.offset < self.bytes.len() {
-                let result = &self.bytes[self.offset..self.offset + 1];
-                self.bytes_read.extend(result.iter());
-                self.offset += 1;
-                result
-            } else {
-                &[]
-            }
-        }
-
-        fn seek(&mut self, byte: u32, _position: Point) {
-            self.offset = byte as usize;
-        }
-    }
 }
diff --git a/vendor/tree-sitter b/vendor/tree-sitter
index 78f28b14..26ab57a6 160000
--- a/vendor/tree-sitter
+++ b/vendor/tree-sitter
@@ -1 +1 @@
-Subproject commit 78f28b14ce519ba085ab7886c2fc19739f7f7da0
+Subproject commit 26ab57a6562aaeb48b579e3ca29eb064925e857c

From 86c8206e35757694d37d3fe627236d22a75eb3ec Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 19 Jun 2018 16:20:58 -0700
Subject: [PATCH 029/208] 0.2.0

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 12d92923..bfc6b2e2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.1.1"
+version = "0.2.0"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
 license = "MIT"

From 2eff3225bac3422b19fc442482eb45f0462fa478 Mon Sep 17 00:00:00 2001
From: Stephan Renatus <srenatus@chef.io>
Date: Thu, 28 Jun 2018 10:25:01 +0200
Subject: [PATCH 030/208] README.md: small fixes

To call .set_language on parser, it needs to be mut; also, the syntax
for the extern "C" blocks seemed to be a bit off.

Both now corresponds to what's in the tests.

Signed-off-by: Stephan Renatus <srenatus@chef.io>
---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index ff7140c5..449c6c46 100644
--- a/README.md
+++ b/README.md
@@ -16,15 +16,15 @@ use tree_sitter::{Parser, Language};
 
 // ...
 
-let parser = Parser::new();
+let mut parser = Parser::new();
 ```
 
 Then assign a language to the parser. Tree-sitter languages consist of generated C code. To use them from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`:
 
 ```rust
-extern "C" fn tree_sitter_c() -> Language;
-extern "C" fn tree_sitter_rust() -> Language;
-extern "C" fn tree_sitter_javascript() -> Language;
+extern "C" { fn tree_sitter_c() -> Language; }
+extern "C" { fn tree_sitter_rust() -> Language; }
+extern "C" { fn tree_sitter_javascript() -> Language; }
 
 let language = unsafe { tree_sitter_rust() };
 parser.set_language(language).unwrap();

From c477e45fccf746fcb9335ba777ace035a6292a48 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 20 Jul 2018 13:32:22 -0700
Subject: [PATCH 031/208] Update to the latest Tree-sitter

---
 src/bindings.rs    | 30 +++++++++++++++++++++++++-----
 src/lib.rs         |  6 +++++-
 vendor/tree-sitter |  2 +-
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/bindings.rs b/src/bindings.rs
index b2d83729..58d0e510 100644
--- a/src/bindings.rs
+++ b/src/bindings.rs
@@ -33,8 +33,10 @@ pub struct TSPoint {
 #[repr(C)]
 #[derive(Debug, Copy, Clone)]
 pub struct TSRange {
-    pub start: TSPoint,
-    pub end: TSPoint,
+    pub start_point: TSPoint,
+    pub end_point: TSPoint,
+    pub start_byte: u32,
+    pub end_byte: u32,
 }
 #[repr(C)]
 #[derive(Debug, Copy, Clone)]
@@ -80,7 +82,7 @@ pub struct TSInputEdit {
 pub struct TSNode {
     pub context: [u32; 4usize],
     pub id: *const ::std::os::raw::c_void,
-    pub tree: *const ::std::os::raw::c_void,
+    pub tree: *const TSTree,
 }
 #[repr(C)]
 #[derive(Debug, Copy, Clone)]
@@ -139,6 +141,12 @@ extern "C" {
 extern "C" {
     pub fn ts_parser_reset(arg1: *mut TSParser);
 }
+extern "C" {
+    pub fn ts_parser_set_included_ranges(arg1: *mut TSParser, arg2: *const TSRange, arg3: u32);
+}
+extern "C" {
+    pub fn ts_parser_included_ranges(arg1: *const TSParser, arg2: *mut u32) -> *const TSRange;
+}
 extern "C" {
     pub fn ts_tree_copy(arg1: *const TSTree) -> *mut TSTree;
 }
@@ -161,6 +169,9 @@ extern "C" {
 extern "C" {
     pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE);
 }
+extern "C" {
+    pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage;
+}
 extern "C" {
     pub fn ts_node_start_byte(arg1: TSNode) -> u32;
 }
@@ -251,7 +262,10 @@ extern "C" {
     ) -> TSNode;
 }
 extern "C" {
-    pub fn ts_tree_cursor_new(arg1: *const TSTree) -> TSTreeCursor;
+    pub fn ts_node_edit(arg1: *mut TSNode, arg2: *const TSInputEdit);
+}
+extern "C" {
+    pub fn ts_tree_cursor_new(arg1: TSNode) -> TSTreeCursor;
 }
 extern "C" {
     pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
@@ -280,6 +294,12 @@ extern "C" {
         arg2: TSSymbol,
     ) -> *const ::std::os::raw::c_char;
 }
+extern "C" {
+    pub fn ts_language_symbol_for_name(
+        arg1: *const TSLanguage,
+        arg2: *const ::std::os::raw::c_char,
+    ) -> TSSymbol;
+}
 extern "C" {
     pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
 }
@@ -287,4 +307,4 @@ extern "C" {
     pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
 }
 
-pub const TREE_SITTER_LANGUAGE_VERSION: usize = 8;
+pub const TREE_SITTER_LANGUAGE_VERSION: usize = 9;
diff --git a/src/lib.rs b/src/lib.rs
index 84d51f04..9f0ef9b9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -216,7 +216,7 @@ impl Tree {
     }
 
     pub fn walk(&self) -> TreeCursor {
-        TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
+        self.root_node().walk()
     }
 }
 
@@ -337,6 +337,10 @@ impl<'tree> Node<'tree> {
         unsafe { free(c_string as *mut c_void) };
         result
     }
+
+    pub fn walk(&self) -> TreeCursor<'tree> {
+        TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
+    }
 }
 
 impl<'a> PartialEq for Node<'a> {
diff --git a/vendor/tree-sitter b/vendor/tree-sitter
index 26ab57a6..16376c43 160000
--- a/vendor/tree-sitter
+++ b/vendor/tree-sitter
@@ -1 +1 @@
-Subproject commit 26ab57a6562aaeb48b579e3ca29eb064925e857c
+Subproject commit 16376c43f5cc75bbc5297e6d5716bd94d55ccc05

From 47a7430da319b8e2a55cdb8998acc3f3f099a1c7 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 20 Jul 2018 13:32:56 -0700
Subject: [PATCH 032/208] 0.3.0

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index bfc6b2e2..746d2d47 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.2.0"
+version = "0.3.0"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
 license = "MIT"

From bdd52376a82ae2354b6226d9bb3b23649b81df4d Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 20 Jul 2018 13:36:12 -0700
Subject: [PATCH 033/208] Fix cargo category slugs

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 746d2d47..c2d733f2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ build = "build.rs"
 license = "MIT"
 readme = "README.md"
 keywords = ["incremental", "parsing"]
-categories = ["parsing", "text editors", "api bindings"]
+categories = ["api-bindings", "parsing", "text-editors"]
 
 include = [
   "/build.rs",

From 5fbb261316737117c827db935e667bcfd3932348 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 20 Jul 2018 13:36:42 -0700
Subject: [PATCH 034/208] 0.3.1

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index c2d733f2..9adbcfd1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
 license = "MIT"

From c8125ec617ec4a3e2d93c460bcc22c89f1c06981 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 8 Oct 2018 11:32:40 -0700
Subject: [PATCH 035/208] Make Language send + sync, add language methods

---
 src/lib.rs | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 9f0ef9b9..434d05fb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -6,7 +6,8 @@ use std::marker::PhantomData;
 use std::os::raw::{c_char, c_void};
 use std::ptr;
 
-pub type Language = *const ffi::TSLanguage;
+#[repr(transparent)]
+pub struct Language (*const ffi::TSLanguage);
 
 #[derive(Debug, PartialEq, Eq)]
 pub enum LogType {
@@ -50,9 +51,9 @@ impl Parser {
 
     pub fn set_language(&mut self, language: Language) -> Result<(), String> {
         unsafe {
-            let version = ffi::ts_language_version(language) as usize;
+            let version = ffi::ts_language_version(language.0) as usize;
             if version == ffi::TREE_SITTER_LANGUAGE_VERSION {
-                ffi::ts_parser_set_language(self.0, language);
+                ffi::ts_parser_set_language(self.0, language.0);
                 Ok(())
             } else {
                 Err(format!(
@@ -222,6 +223,24 @@ impl Tree {
 
 unsafe impl Send for Tree {}
 
+impl Language {
+    pub fn node_kind_count(&self) -> usize {
+        unsafe { ffi::ts_language_symbol_count(self.0) as usize }
+    }
+
+    pub fn node_kind_for_id(&self, id: u16) -> &'static str {
+        unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) }.to_str().unwrap()
+    }
+
+    pub fn node_kind_is_named(&self, id: u16) -> bool {
+        unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular }
+    }
+}
+
+unsafe impl Send for Language {}
+
+unsafe impl Sync for Language {}
+
 impl fmt::Debug for Tree {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         write!(f, "{{Tree {:?}}}", self.root_node())
@@ -527,7 +546,7 @@ mod tests {
         }, None).unwrap();
 
         let root = tree.root_node();
-        assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))");
+        assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
         assert_eq!(root.kind(), "source_file");
         assert_eq!(root.has_error(), false);
         assert_eq!(root.child(0).unwrap().kind(), "function_item");
@@ -563,7 +582,7 @@ mod tests {
         }, None).unwrap();
 
         let root = tree.root_node();
-        assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))");
+        assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
         assert_eq!(root.kind(), "source_file");
         assert_eq!(root.has_error(), false);
         assert_eq!(root.child(0).unwrap().kind(), "function_item");

From 0c2e1c189b2c4f696a1a1b48ee1ad04c7ef49936 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 8 Oct 2018 22:32:58 -0700
Subject: [PATCH 036/208] Implement Clone for Language

---
 src/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/lib.rs b/src/lib.rs
index 434d05fb..81b4d09a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -6,6 +6,7 @@ use std::marker::PhantomData;
 use std::os::raw::{c_char, c_void};
 use std::ptr;
 
+#[derive(Clone, Copy)]
 #[repr(transparent)]
 pub struct Language (*const ffi::TSLanguage);
 

From 572e8c202e36c98e875a67f2edadbbad341602cf Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 8 Oct 2018 22:33:11 -0700
Subject: [PATCH 037/208] Implement Send for Parser

---
 src/lib.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index 81b4d09a..c547974b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -200,6 +200,8 @@ impl Drop for Parser {
     }
 }
 
+unsafe impl Send for Parser {}
+
 impl Tree {
     pub fn root_node(&self) -> Node {
         Node::new(unsafe { ffi::ts_tree_root_node(self.0) }).unwrap()

From 91d35dec7d4ddf60054efbbc6631489af74c09f0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 8 Oct 2018 22:33:43 -0700
Subject: [PATCH 038/208] Add Parser.parser_utf8_io() method

---
 src/lib.rs | 159 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 107 insertions(+), 52 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index c547974b..ff272a29 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,6 +5,7 @@ use std::ffi::CStr;
 use std::marker::PhantomData;
 use std::os::raw::{c_char, c_void};
 use std::ptr;
+use std::io::{self, Read, Seek};
 
 #[derive(Clone, Copy)]
 #[repr(transparent)]
@@ -115,37 +116,15 @@ impl Parser {
         self.parse_utf8(&mut |offset, _| &bytes[(offset as usize)..], old_tree)
     }
 
-    pub fn parse_utf8<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>(
+    pub fn parse_utf8<'a, T: FnMut(u32, Point) -> &'a [u8]>(
         &mut self,
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u8]>(
-            payload: *mut c_void,
-            byte_offset: u32,
-            position: ffi::TSPoint,
-            bytes_read: *mut u32,
-        ) -> *const c_char {
-            let input = (payload as *mut T).as_mut().unwrap();
-            let result = (*input)(byte_offset, position.into());
-            *bytes_read = result.len() as u32;
-            return result.as_ptr() as *const c_char;
-        };
-
-        let c_input = ffi::TSInput {
-            payload: input as *mut T as *mut c_void,
-            read: Some(read::<'a, T>),
-            encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
-        };
-
-        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
-
-        let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
-        if c_new_tree.is_null() {
-            None
-        } else {
-            Some(Tree(c_new_tree))
-        }
+        self.parse_utf8_ptr(&mut |byte, position| {
+            let slice = input(byte, position);
+            (slice.as_ptr(), slice.len())
+        }, old_tree)
     }
 
     pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
@@ -153,34 +132,43 @@ impl Parser {
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        unsafe extern "C" fn read<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
-            payload: *mut c_void,
-            byte_offset: u32,
-            position: ffi::TSPoint,
-            bytes_read: *mut u32,
-        ) -> *const c_char {
-            let input = (payload as *mut T).as_mut().unwrap();
-            let result = (*input)(byte_offset, Point {
-                row: position.row,
-                column: position.column / 2,
-            });
-            *bytes_read = result.len() as u32 * 2;
-            return result.as_ptr() as *const c_char;
-        };
+        self.parse_utf16_ptr(&mut |byte, position| {
+            let slice = input(byte, position);
+            (slice.as_ptr(), slice.len())
+        }, old_tree)
+    }
 
-        let c_input = ffi::TSInput {
-            payload: input as *mut T as *mut c_void,
-            read: Some(read::<'a, T>),
-            encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
-        };
+    pub fn parse_utf8_io(
+        &mut self,
+        mut input: impl Read + Seek,
+        old_tree: Option<&Tree>,
+    ) -> io::Result<Option<Tree>> {
+        let mut error = None;
+        let mut current_offset = 0;
+        let mut buffer = [0; 10 * 1024];
+        let result = self.parse_utf8_ptr(&mut |byte, _| {
+            if byte as u64 != current_offset {
+                current_offset = byte as u64;
+                if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) {
+                    error = Some(e);
+                    return (ptr::null(), 0)
+                }
+            }
 
-        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
+            match input.read(&mut buffer) {
+                Err(e) => {
+                    error = Some(e);
+                    (ptr::null(), 0)
+                },
+                Ok(length) => {
+                    (buffer.as_ptr(), length)
+                }
+            }
+        }, old_tree);
 
-        let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
-        if c_new_tree.is_null() {
-            None
-        } else {
-            Some(Tree(c_new_tree))
+        match error {
+            Some(e) => Err(e),
+            None => Ok(result)
         }
     }
 
@@ -191,6 +179,73 @@ impl Parser {
     pub fn set_operation_limit(&mut self, limit: usize) {
         unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
     }
+
+    fn parse_utf8_ptr<T: FnMut(u32, Point) -> (*const u8, usize)>(
+        &mut self,
+        input: &mut T,
+        old_tree: Option<&Tree>,
+    ) -> Option<Tree> {
+        unsafe extern "C" fn read<T: FnMut(u32, Point) -> (*const u8, usize)> (
+            payload: *mut c_void,
+            byte_offset: u32,
+            position: ffi::TSPoint,
+            bytes_read: *mut u32,
+        ) -> *const c_char {
+            let input = (payload as *mut T).as_mut().unwrap();
+            let (ptr, length) = (*input)(byte_offset, position.into());
+            *bytes_read = length as u32;
+            return ptr as *const c_char;
+        };
+
+        let c_input = ffi::TSInput {
+            payload: input as *mut T as *mut c_void,
+            read: Some(read::<T>),
+            encoding: ffi::TSInputEncoding_TSInputEncodingUTF8,
+        };
+
+        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
+        let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
+        if c_new_tree.is_null() {
+            None
+        } else {
+            Some(Tree(c_new_tree))
+        }
+    }
+
+    fn parse_utf16_ptr<T: FnMut(u32, Point) -> (*const u16, usize)>(
+        &mut self,
+        input: &mut T,
+        old_tree: Option<&Tree>,
+    ) -> Option<Tree> {
+        unsafe extern "C" fn read<T: FnMut(u32, Point) -> (*const u16, usize)>(
+            payload: *mut c_void,
+            byte_offset: u32,
+            position: ffi::TSPoint,
+            bytes_read: *mut u32,
+        ) -> *const c_char {
+            let input = (payload as *mut T).as_mut().unwrap();
+            let (ptr, length) = (*input)(byte_offset, Point {
+                row: position.row,
+                column: position.column / 2,
+            });
+            *bytes_read = length as u32 * 2;
+            ptr as *const c_char
+        };
+
+        let c_input = ffi::TSInput {
+            payload: input as *mut T as *mut c_void,
+            read: Some(read::<T>),
+            encoding: ffi::TSInputEncoding_TSInputEncodingUTF16,
+        };
+
+        let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0);
+        let c_new_tree = unsafe { ffi::ts_parser_parse(self.0, c_old_tree, c_input) };
+        if c_new_tree.is_null() {
+            None
+        } else {
+            Some(Tree(c_new_tree))
+        }
+    }
 }
 
 impl Drop for Parser {

From a8cbde6dbfbc8ae9b7b37075ad0dffeed3e079b8 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 9 Oct 2018 08:23:02 -0700
Subject: [PATCH 039/208] Run rustfmt on lib.rs

---
 src/lib.rs | 336 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 204 insertions(+), 132 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index ff272a29..4a132a3f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,15 +1,15 @@
 mod ffi;
 
-use std::fmt;
 use std::ffi::CStr;
+use std::fmt;
+use std::io::{self, Read, Seek};
 use std::marker::PhantomData;
 use std::os::raw::{c_char, c_void};
 use std::ptr;
-use std::io::{self, Read, Seek};
 
 #[derive(Clone, Copy)]
 #[repr(transparent)]
-pub struct Language (*const ffi::TSLanguage);
+pub struct Language(*const ffi::TSLanguage);
 
 #[derive(Debug, PartialEq, Eq)]
 pub enum LogType {
@@ -43,6 +43,26 @@ pub struct Tree(*mut ffi::TSTree);
 
 pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
+impl Language {
+    pub fn node_kind_count(&self) -> usize {
+        unsafe { ffi::ts_language_symbol_count(self.0) as usize }
+    }
+
+    pub fn node_kind_for_id(&self, id: u16) -> &'static str {
+        unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) }
+            .to_str()
+            .unwrap()
+    }
+
+    pub fn node_kind_is_named(&self, id: u16) -> bool {
+        unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular }
+    }
+}
+
+unsafe impl Send for Language {}
+
+unsafe impl Sync for Language {}
+
 impl Parser {
     pub fn new() -> Parser {
         unsafe {
@@ -105,7 +125,10 @@ impl Parser {
                 log: Some(log),
             };
         } else {
-            c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None };
+            c_logger = ffi::TSLogger {
+                payload: ptr::null_mut(),
+                log: None,
+            };
         }
 
         unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
@@ -121,10 +144,13 @@ impl Parser {
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        self.parse_utf8_ptr(&mut |byte, position| {
-            let slice = input(byte, position);
-            (slice.as_ptr(), slice.len())
-        }, old_tree)
+        self.parse_utf8_ptr(
+            &mut |byte, position| {
+                let slice = input(byte, position);
+                (slice.as_ptr(), slice.len())
+            },
+            old_tree,
+        )
     }
 
     pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
@@ -132,10 +158,13 @@ impl Parser {
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        self.parse_utf16_ptr(&mut |byte, position| {
-            let slice = input(byte, position);
-            (slice.as_ptr(), slice.len())
-        }, old_tree)
+        self.parse_utf16_ptr(
+            &mut |byte, position| {
+                let slice = input(byte, position);
+                (slice.as_ptr(), slice.len())
+            },
+            old_tree,
+        )
     }
 
     pub fn parse_utf8_io(
@@ -146,29 +175,30 @@ impl Parser {
         let mut error = None;
         let mut current_offset = 0;
         let mut buffer = [0; 10 * 1024];
-        let result = self.parse_utf8_ptr(&mut |byte, _| {
-            if byte as u64 != current_offset {
-                current_offset = byte as u64;
-                if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) {
-                    error = Some(e);
-                    return (ptr::null(), 0)
+        let result = self.parse_utf8_ptr(
+            &mut |byte, _| {
+                if byte as u64 != current_offset {
+                    current_offset = byte as u64;
+                    if let Err(e) = input.seek(io::SeekFrom::Start(current_offset)) {
+                        error = Some(e);
+                        return (ptr::null(), 0);
+                    }
                 }
-            }
 
-            match input.read(&mut buffer) {
-                Err(e) => {
-                    error = Some(e);
-                    (ptr::null(), 0)
-                },
-                Ok(length) => {
-                    (buffer.as_ptr(), length)
+                match input.read(&mut buffer) {
+                    Err(e) => {
+                        error = Some(e);
+                        (ptr::null(), 0)
+                    }
+                    Ok(length) => (buffer.as_ptr(), length),
                 }
-            }
-        }, old_tree);
+            },
+            old_tree,
+        );
 
         match error {
             Some(e) => Err(e),
-            None => Ok(result)
+            None => Ok(result),
         }
     }
 
@@ -185,7 +215,7 @@ impl Parser {
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        unsafe extern "C" fn read<T: FnMut(u32, Point) -> (*const u8, usize)> (
+        unsafe extern "C" fn read<T: FnMut(u32, Point) -> (*const u8, usize)>(
             payload: *mut c_void,
             byte_offset: u32,
             position: ffi::TSPoint,
@@ -224,10 +254,13 @@ impl Parser {
             bytes_read: *mut u32,
         ) -> *const c_char {
             let input = (payload as *mut T).as_mut().unwrap();
-            let (ptr, length) = (*input)(byte_offset, Point {
-                row: position.row,
-                column: position.column / 2,
-            });
+            let (ptr, length) = (*input)(
+                byte_offset,
+                Point {
+                    row: position.row,
+                    column: position.column / 2,
+                },
+            );
             *bytes_read = length as u32 * 2;
             ptr as *const c_char
         };
@@ -281,24 +314,6 @@ impl Tree {
 
 unsafe impl Send for Tree {}
 
-impl Language {
-    pub fn node_kind_count(&self) -> usize {
-        unsafe { ffi::ts_language_symbol_count(self.0) as usize }
-    }
-
-    pub fn node_kind_for_id(&self, id: u16) -> &'static str {
-        unsafe { CStr::from_ptr(ffi::ts_language_symbol_name(self.0, id)) }.to_str().unwrap()
-    }
-
-    pub fn node_kind_is_named(&self, id: u16) -> bool {
-        unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular }
-    }
-}
-
-unsafe impl Send for Language {}
-
-unsafe impl Sync for Language {}
-
 impl fmt::Debug for Tree {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         write!(f, "{{Tree {:?}}}", self.root_node())
@@ -331,7 +346,9 @@ impl<'tree> Node<'tree> {
     }
 
     pub fn kind(&self) -> &'static str {
-        unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }.to_str().unwrap()
+        unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) }
+            .to_str()
+            .unwrap()
     }
 
     pub fn is_named(&self) -> bool {
@@ -407,10 +424,15 @@ impl<'tree> Node<'tree> {
     }
 
     pub fn to_sexp(&self) -> String {
-        extern "C" { fn free(pointer: *mut c_void); }
+        extern "C" {
+            fn free(pointer: *mut c_void);
+        }
 
         let c_string = unsafe { ffi::ts_node_string(self.0) };
-        let result = unsafe { CStr::from_ptr(c_string) }.to_str().unwrap().to_string();
+        let result = unsafe { CStr::from_ptr(c_string) }
+            .to_str()
+            .unwrap()
+            .to_string();
         unsafe { free(c_string as *mut c_void) };
         result
     }
@@ -428,7 +450,13 @@ impl<'a> PartialEq for Node<'a> {
 
 impl<'a> fmt::Debug for Node<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        write!(f, "{{Node {} {} - {}}}", self.kind(), self.start_position(), self.end_position())
+        write!(
+            f,
+            "{{Node {} {} - {}}}",
+            self.kind(),
+            self.start_position(),
+            self.end_position()
+        )
     }
 }
 
@@ -500,21 +528,30 @@ impl From<ffi::TSPoint> for Point {
 
 #[cfg(test)]
 mod tests {
-    use std::thread;
     use super::*;
+    use std::thread;
 
-    fn rust() -> Language { unsafe { tree_sitter_rust() } }
-    extern "C" { fn tree_sitter_rust() -> Language; }
+    fn rust() -> Language {
+        unsafe { tree_sitter_rust() }
+    }
+    extern "C" {
+        fn tree_sitter_rust() -> Language;
+    }
 
     #[test]
     fn test_basic_parsing() {
         let mut parser = Parser::new();
         parser.set_language(rust()).unwrap();
 
-        let tree = parser.parse_str("
+        let tree = parser
+            .parse_str(
+                "
             struct Stuff {}
             fn main() {}
-        ", None).unwrap();
+        ",
+                None,
+            )
+            .unwrap();
 
         let root_node = tree.root_node();
         assert_eq!(root_node.kind(), "source_file");
@@ -538,12 +575,20 @@ mod tests {
             messages.push((log_type, message.to_string()));
         })));
 
-        parser.parse_str("
+        parser
+            .parse_str(
+                "
             struct Stuff {}
             fn main() {}
-        ", None).unwrap();
+        ",
+                None,
+            )
+            .unwrap();
 
-        assert!(messages.contains(&(LogType::Parse, "reduce sym:struct_item, child_count:3".to_string())));
+        assert!(messages.contains(&(
+            LogType::Parse,
+            "reduce sym:struct_item, child_count:3".to_string()
+        )));
         assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
     }
 
@@ -552,12 +597,17 @@ mod tests {
         let mut parser = Parser::new();
         parser.set_language(rust()).unwrap();
 
-        let tree = parser.parse_str("
+        let tree = parser
+            .parse_str(
+                "
             struct Stuff {
                 a: A;
                 b: Option<B>,
             }
-        ", None).unwrap();
+        ",
+                None,
+            )
+            .unwrap();
 
         let mut cursor = tree.walk();
         assert_eq!(cursor.node().kind(), "source_file");
@@ -583,25 +633,26 @@ mod tests {
         let mut parser = Parser::new();
         parser.set_language(rust()).unwrap();
 
-        let lines = &[
-            "pub fn foo() {",
-            "  1",
-            "}",
-        ];
+        let lines = &["pub fn foo() {", "  1", "}"];
 
-        let tree = parser.parse_utf8(&mut |_, position| {
-            let row = position.row as usize;
-            let column = position.column as usize;
-            if row < lines.len() {
-                if column < lines[row].as_bytes().len() {
-                    &lines[row].as_bytes()[column..]
-                } else {
-                    "\n".as_bytes()
-                }
-            } else {
-                &[]
-            }
-        }, None).unwrap();
+        let tree = parser
+            .parse_utf8(
+                &mut |_, position| {
+                    let row = position.row as usize;
+                    let column = position.column as usize;
+                    if row < lines.len() {
+                        if column < lines[row].as_bytes().len() {
+                            &lines[row].as_bytes()[column..]
+                        } else {
+                            "\n".as_bytes()
+                        }
+                    } else {
+                        &[]
+                    }
+                },
+                None,
+            )
+            .unwrap();
 
         let root = tree.root_node();
         assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
@@ -619,25 +670,29 @@ mod tests {
             println!("log: {:?} {}", t, message);
         })));
 
-        let lines: Vec<Vec<u16>> = [
-            "pub fn foo() {",
-            "  1",
-            "}"
-        ].iter().map(|s| s.encode_utf16().collect()).collect();
+        let lines: Vec<Vec<u16>> = ["pub fn foo() {", "  1", "}"]
+            .iter()
+            .map(|s| s.encode_utf16().collect())
+            .collect();
 
-        let tree = parser.parse_utf16(&mut |_, position| {
-            let row = position.row as usize;
-            let column = position.column as usize;
-            if row < lines.len() {
-                if column < lines[row].len() {
-                    &lines[row][column..]
-                } else {
-                    &[10]
-                }
-            } else {
-                &[]
-            }
-        }, None).unwrap();
+        let tree = parser
+            .parse_utf16(
+                &mut |_, position| {
+                    let row = position.row as usize;
+                    let column = position.column as usize;
+                    if row < lines.len() {
+                        if column < lines[row].len() {
+                            &lines[row][column..]
+                        } else {
+                            &[10]
+                        }
+                    } else {
+                        &[]
+                    }
+                },
+                None,
+            )
+            .unwrap();
 
         let root = tree.root_node();
         assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
@@ -666,20 +721,28 @@ mod tests {
         let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
         let mut input_bytes_read = Vec::new();
 
-        let mut tree = parser.parse_utf8(&mut |offset, _| {
-            let offset = offset as usize;
-            if offset < input_bytes.len() {
-                let result = &input_bytes[offset..offset + 1];
-                input_bytes_read.extend(result.iter());
-                result
-            } else {
-                &[]
-            }
-        }, None).unwrap();
+        let mut tree = parser
+            .parse_utf8(
+                &mut |offset, _| {
+                    let offset = offset as usize;
+                    if offset < input_bytes.len() {
+                        let result = &input_bytes[offset..offset + 1];
+                        input_bytes_read.extend(result.iter());
+                        result
+                    } else {
+                        &[]
+                    }
+                },
+                None,
+            )
+            .unwrap();
 
-        let parameters_sexp = tree.root_node()
-            .named_child(0).unwrap()
-            .named_child(1).unwrap()
+        let parameters_sexp = tree
+            .root_node()
+            .named_child(0)
+            .unwrap()
+            .named_child(1)
+            .unwrap()
             .to_sexp();
         assert_eq!(
             parameters_sexp,
@@ -688,7 +751,7 @@ mod tests {
 
         input_bytes_read.clear();
         input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
-        tree.edit(&InputEdit{
+        tree.edit(&InputEdit {
             start_byte: 14,
             old_end_byte: 14,
             new_end_byte: 20,
@@ -697,20 +760,28 @@ mod tests {
             new_end_position: Point::new(0, 20),
         });
 
-        let tree = parser.parse_utf8(&mut |offset, _| {
-            let offset = offset as usize;
-            if offset < input_bytes.len() {
-                let result = &input_bytes[offset..offset + 1];
-                input_bytes_read.extend(result.iter());
-                result
-            } else {
-                &[]
-            }
-        }, Some(&tree)).unwrap();
+        let tree = parser
+            .parse_utf8(
+                &mut |offset, _| {
+                    let offset = offset as usize;
+                    if offset < input_bytes.len() {
+                        let result = &input_bytes[offset..offset + 1];
+                        input_bytes_read.extend(result.iter());
+                        result
+                    } else {
+                        &[]
+                    }
+                },
+                Some(&tree),
+            )
+            .unwrap();
 
-        let parameters_sexp = tree.root_node()
-            .named_child(0).unwrap()
-            .named_child(1).unwrap()
+        let parameters_sexp = tree
+            .root_node()
+            .named_child(0)
+            .unwrap()
+            .named_child(1)
+            .unwrap()
             .to_sexp();
         assert_eq!(
             parameters_sexp,
@@ -738,7 +809,6 @@ mod tests {
         for thread_id in 1..5 {
             let mut tree_clone = tree.clone();
             parse_threads.push(thread::spawn(move || {
-
                 // For each thread, prepend a different number of declarations to the
                 // source code.
                 let mut prepend_line_count = 0;
@@ -748,7 +818,7 @@ mod tests {
                     prepended_source += "struct X {}\n\n";
                 }
 
-                tree_clone.edit(&InputEdit{
+                tree_clone.edit(&InputEdit {
                     start_byte: 0,
                     old_end_byte: 0,
                     new_end_byte: prepended_source.len() as u32,
@@ -761,7 +831,9 @@ mod tests {
                 // Reparse using the old tree as a starting point.
                 let mut parser = Parser::new();
                 parser.set_language(rust()).unwrap();
-                parser.parse_str(&prepended_source, Some(&tree_clone)).unwrap()
+                parser
+                    .parse_str(&prepended_source, Some(&tree_clone))
+                    .unwrap()
             }));
         }
 

From db360b73fb33d5c03a226b42b1bfa60398645873 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 13 Oct 2018 14:09:36 -0700
Subject: [PATCH 040/208] Add Tree.walk_with_properties

---
 Cargo.toml |   5 +
 src/lib.rs | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 292 insertions(+), 7 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 9adbcfd1..485d369e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,5 +20,10 @@ include = [
   "/vendor/tree-sitter/src/runtime/*",
 ]
 
+[dependencies]
+serde = "1.0"
+serde_json = "1.0"
+serde_derive = "1.0"
+
 [build-dependencies]
 cc = "1.0"
diff --git a/src/lib.rs b/src/lib.rs
index 4a132a3f..19b9a670 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,11 +1,17 @@
 mod ffi;
 
+#[macro_use]
+extern crate serde_derive;
+extern crate serde_json;
+
+use std::collections::HashMap;
 use std::ffi::CStr;
 use std::fmt;
 use std::io::{self, Read, Seek};
 use std::marker::PhantomData;
 use std::os::raw::{c_char, c_void};
 use std::ptr;
+use std::str;
 
 #[derive(Clone, Copy)]
 #[repr(transparent)]
@@ -19,7 +25,7 @@ pub enum LogType {
 
 type Logger<'a> = Box<FnMut(LogType, &str) + 'a>;
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub struct Point {
     pub row: u32,
     pub column: u32,
@@ -35,6 +41,22 @@ pub struct InputEdit {
     pub new_end_position: Point,
 }
 
+struct PropertyTransition {
+    state_id: u32,
+    child_index: Option<u32>,
+}
+
+struct PropertyState {
+    transitions: HashMap<u16, Vec<PropertyTransition>>,
+    property_set_id: u32,
+    default_next_state_id: u32,
+}
+
+pub struct PropertySheet {
+    states: Vec<PropertyState>,
+    property_sets: Vec<HashMap<String, String>>,
+}
+
 pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
 
 pub struct Parser(*mut ffi::TSParser);
@@ -43,6 +65,13 @@ pub struct Tree(*mut ffi::TSTree);
 
 pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
+pub struct TreePropertyCursor<'a> {
+    cursor: TreeCursor<'a>,
+    state_stack: Vec<u32>,
+    child_index_stack: Vec<u32>,
+    property_sheet: &'a PropertySheet,
+}
+
 impl Language {
     pub fn node_kind_count(&self) -> usize {
         unsafe { ffi::ts_language_symbol_count(self.0) as usize }
@@ -310,6 +339,13 @@ impl Tree {
     pub fn walk(&self) -> TreeCursor {
         self.root_node().walk()
     }
+
+    pub fn walk_with_properties<'a>(
+        &'a self,
+        property_sheet: &'a PropertySheet,
+    ) -> TreePropertyCursor<'a> {
+        TreePropertyCursor::new(self, property_sheet)
+    }
 }
 
 unsafe impl Send for Tree {}
@@ -437,6 +473,14 @@ impl<'tree> Node<'tree> {
         result
     }
 
+    pub fn utf8_text<'a>(&self, source: &'a str) -> Result<&'a str, str::Utf8Error> {
+        str::from_utf8(&source.as_bytes()[self.start_byte() as usize..self.end_byte() as usize])
+    }
+
+    pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] {
+        &source[self.start_byte() as usize..self.end_byte() as usize]
+    }
+
     pub fn walk(&self) -> TreeCursor<'tree> {
         TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
     }
@@ -461,7 +505,7 @@ impl<'a> fmt::Debug for Node<'a> {
 }
 
 impl<'a> TreeCursor<'a> {
-    pub fn node(&'a self) -> Node<'a> {
+    pub fn node(&self) -> Node<'a> {
         Node(
             unsafe { ffi::ts_tree_cursor_current_node(&self.0) },
             PhantomData,
@@ -496,6 +540,87 @@ impl<'a> Drop for TreeCursor<'a> {
     }
 }
 
+impl<'a> TreePropertyCursor<'a> {
+    fn new(tree: &'a Tree, property_sheet: &'a PropertySheet) -> Self {
+        Self {
+            cursor: tree.root_node().walk(),
+            child_index_stack: vec![0],
+            state_stack: vec![0],
+            property_sheet,
+        }
+    }
+
+    pub fn node(&self) -> Node<'a> {
+        self.cursor.node()
+    }
+
+    pub fn node_properties(&self) -> &'a HashMap<String, String> {
+        &self.property_sheet.property_sets[self.current_state().property_set_id as usize]
+    }
+
+    pub fn goto_first_child(&mut self) -> bool {
+        if self.cursor.goto_first_child() {
+            let child_index = 0;
+            let next_state_id = {
+                let state = &self.current_state();
+                let kind_id = self.cursor.node().kind_id();
+                self.next_state(state, kind_id, child_index)
+            };
+            self.state_stack.push(next_state_id);
+            self.child_index_stack.push(child_index);
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn goto_next_sibling(&mut self) -> bool {
+        if self.cursor.goto_next_sibling() {
+            let child_index = self.child_index_stack.pop().unwrap() + 1;
+            self.state_stack.pop();
+            let next_state_id = {
+                let state = &self.current_state();
+                let kind_id = self.cursor.node().kind_id();
+                self.next_state(state, kind_id, child_index)
+            };
+            self.state_stack.push(next_state_id);
+            self.child_index_stack.push(child_index);
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn goto_parent(&mut self) -> bool {
+        if self.cursor.goto_parent() {
+            self.state_stack.pop();
+            self.child_index_stack.pop();
+            true
+        } else {
+            false
+        }
+    }
+
+    fn next_state(&self, state: &PropertyState, node_kind_id: u16, node_child_index: u32) -> u32 {
+        state
+            .transitions
+            .get(&node_kind_id)
+            .and_then(|transitions| {
+                for transition in transitions.iter() {
+                    if transition.child_index == Some(node_child_index) || transition.child_index == None {
+                        return Some(transition.state_id);
+                    }
+                }
+                None
+            })
+            .unwrap_or(state.default_next_state_id)
+    }
+
+    fn current_state(&self) -> &PropertyState {
+        &self.property_sheet.states[*self.state_stack.last().unwrap() as usize]
+    }
+}
+
 impl Point {
     pub fn new(row: u32, column: u32) -> Self {
         Point { row, column }
@@ -526,6 +651,64 @@ impl From<ffi::TSPoint> for Point {
     }
 }
 
+impl PropertySheet {
+    pub fn new(language: Language, json: &str) -> Result<Self, serde_json::Error> {
+        #[derive(Deserialize, Debug)]
+        struct PropertyTransitionJSON {
+            #[serde(rename = "type")]
+            kind: String,
+            named: bool,
+            index: Option<u32>,
+            state_id: u32,
+        }
+
+        #[derive(Deserialize, Debug)]
+        struct PropertyStateJSON {
+            transitions: Vec<PropertyTransitionJSON>,
+            property_set_id: u32,
+            default_next_state_id: u32,
+        }
+
+        #[derive(Deserialize, Debug)]
+        struct PropertySheetJSON {
+            states: Vec<PropertyStateJSON>,
+            property_sets: Vec<HashMap<String, String>>,
+        }
+
+        let input: PropertySheetJSON = serde_json::from_str(json)?;
+        Ok(PropertySheet {
+            property_sets: input.property_sets,
+            states: input
+                .states
+                .iter()
+                .map(|state| {
+                    let mut transitions = HashMap::new();
+                    let node_kind_count = language.node_kind_count();
+                    for transition in state.transitions.iter() {
+                        for i in 0..node_kind_count {
+                            let i = i as u16;
+                            if language.node_kind_is_named(i) == transition.named
+                                && transition.kind == language.node_kind_for_id(i)
+                            {
+                                let entry = transitions.entry(i).or_insert(Vec::new());
+                                entry.push(PropertyTransition {
+                                    child_index: transition.index,
+                                    state_id: transition.state_id,
+                                });
+                            }
+                        }
+                    }
+                    PropertyState {
+                        transitions,
+                        default_next_state_id: state.default_next_state_id,
+                        property_set_id: state.property_set_id,
+                    }
+                })
+                .collect(),
+        })
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -600,11 +783,11 @@ mod tests {
         let tree = parser
             .parse_str(
                 "
-            struct Stuff {
-                a: A;
-                b: Option<B>,
-            }
-        ",
+                    struct Stuff {
+                        a: A;
+                        b: Option<B>,
+                    }
+                ",
                 None,
             )
             .unwrap();
@@ -628,6 +811,103 @@ mod tests {
         assert_eq!(cursor.node().is_named(), true);
     }
 
+    #[test]
+    fn test_tree_property_matching() {
+        let mut parser = Parser::new();
+        parser.set_language(rust()).unwrap();
+        let tree = parser.parse_str("fn f1() { f2(); }", None).unwrap();
+
+        let property_sheet = PropertySheet::new(
+            rust(),
+            r##"
+            {
+                "states": [
+                    {
+                        "transitions": [
+                            {"type": "call_expression", "named": true, "state_id": 1},
+                            {"type": "function_item", "named": true, "state_id": 2}
+                        ],
+                        "default_next_state_id": 0,
+                        "property_set_id": 0
+                    },
+                    {
+                        "transitions": [
+                            {"type": "identifier", "named": true, "state_id": 3}
+                        ],
+                        "default_next_state_id": 0,
+                        "property_set_id": 0
+                    },
+                    {
+                        "transitions": [
+                            {"type": "identifier", "named": true, "state_id": 4}
+                        ],
+                        "default_next_state_id": 0,
+                        "property_set_id": 0
+                    },
+                    {
+                        "transitions": [],
+                        "default_next_state_id": 0,
+                        "property_set_id": 1
+                    },
+                    {
+                        "transitions": [],
+                        "default_next_state_id": 0,
+                        "property_set_id": 2
+                    }
+                ],
+                "property_sets": [
+                    {},
+                    {"reference": "function"},
+                    {"define": "function"}
+                ]
+            }
+        "##,
+        )
+        .unwrap();
+
+        let mut cursor = tree.walk_with_properties(&property_sheet);
+        assert_eq!(cursor.node().kind(), "source_file");
+        assert_eq!(*cursor.node_properties(), HashMap::new());
+
+        assert!(cursor.goto_first_child());
+        assert_eq!(cursor.node().kind(), "function_item");
+        assert_eq!(*cursor.node_properties(), HashMap::new());
+
+        assert!(cursor.goto_first_child());
+        assert_eq!(cursor.node().kind(), "fn");
+        assert_eq!(*cursor.node_properties(), HashMap::new());
+        assert!(!cursor.goto_first_child());
+
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), "identifier");
+        assert_eq!(cursor.node_properties()["define"], "function");
+        assert!(!cursor.goto_first_child());
+
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), "parameters");
+        assert_eq!(*cursor.node_properties(), HashMap::new());
+
+        assert!(cursor.goto_first_child());
+        assert_eq!(cursor.node().kind(), "(");
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), ")");
+        assert_eq!(*cursor.node_properties(), HashMap::new());
+
+        assert!(cursor.goto_parent());
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), "block");
+        assert_eq!(*cursor.node_properties(), HashMap::new());
+
+        assert!(cursor.goto_first_child());
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), "call_expression");
+        assert_eq!(*cursor.node_properties(), HashMap::new());
+
+        assert!(cursor.goto_first_child());
+        assert_eq!(cursor.node().kind(), "identifier");
+        assert_eq!(cursor.node_properties()["reference"], "function");
+    }
+
     #[test]
     fn test_custom_utf8_input() {
         let mut parser = Parser::new();

From afe722358236dfb1389471a1037531b7c5422d0f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 20 Nov 2018 15:56:16 -0800
Subject: [PATCH 041/208] Upgrade Tree-sitter, use single source file in build
 script

---
 build.rs           | 25 ++-----------------------
 vendor/tree-sitter |  2 +-
 2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/build.rs b/build.rs
index 7d9ee83e..add3bec7 100644
--- a/build.rs
+++ b/build.rs
@@ -13,29 +13,8 @@ fn main() {
         .flag_if_supported("-Wno-unused-parameter")
         .include(root_path.join("src"))
         .include(root_path.join("include"))
-        .include(root_path.join("externals").join("utf8proc"));
-
-    let source_filenames = [
-      "get_changed_ranges.c",
-      "language.c",
-      "lexer.c",
-      "node.c",
-      "parser.c",
-      "stack.c",
-      "subtree.c",
-      "tree_cursor.c",
-      "tree.c",
-      "utf16.c",
-    ];
-
-    config.files(source_filenames.iter().map(|source_filename| {
-        root_path
-            .join("src")
-            .join("runtime")
-            .join(&source_filename)
-    }));
-
-    config.file(root_path.join("externals").join("utf8proc").join("utf8proc.c"));
+        .include(root_path.join("externals").join("utf8proc"))
+        .file(root_path.join("src").join("runtime").join("runtime.c"));
 
     if env::var("RUST_TREE_SITTER_TEST").is_ok() {
         let parser_dir: PathBuf = ["fixtures", "tree-sitter-rust", "src"].iter().collect();
diff --git a/vendor/tree-sitter b/vendor/tree-sitter
index 16376c43..6b8e5bd1 160000
--- a/vendor/tree-sitter
+++ b/vendor/tree-sitter
@@ -1 +1 @@
-Subproject commit 16376c43f5cc75bbc5297e6d5716bd94d55ccc05
+Subproject commit 6b8e5bd1f96ab63f17873ef9f7a72569a421810f

From 8fdcf84ff3396e4c8fc8ee4cdc9e37ebe9f126cf Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 20 Nov 2018 16:00:45 -0800
Subject: [PATCH 042/208] 0.3.2

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 485d369e..2c92acc5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.3.1"
+version = "0.3.2"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
 license = "MIT"

From a741265ead8dc67de991046d295e2f316681cce0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 28 Nov 2018 17:26:16 -0800
Subject: [PATCH 043/208] Replace all u32s in the API with usizes

Co-Authored-By: Timothy Clem <timothy.clem@gmail.com>
---
 src/lib.rs | 146 +++++++++++++++++++++++++++--------------------------
 1 file changed, 75 insertions(+), 71 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 19b9a670..fa3d970e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,29 +27,36 @@ type Logger<'a> = Box<FnMut(LogType, &str) + 'a>;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub struct Point {
-    pub row: u32,
-    pub column: u32,
+    pub row: usize,
+    pub column: usize,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct InputEdit {
-    pub start_byte: u32,
-    pub old_end_byte: u32,
-    pub new_end_byte: u32,
+    pub start_byte: usize,
+    pub old_end_byte: usize,
+    pub new_end_byte: usize,
     pub start_position: Point,
     pub old_end_position: Point,
     pub new_end_position: Point,
 }
 
 struct PropertyTransition {
-    state_id: u32,
-    child_index: Option<u32>,
+    state_id: usize,
+    child_index: Option<usize>,
+    text_regex: Option<Regex>,
 }
 
 struct PropertyState {
     transitions: HashMap<u16, Vec<PropertyTransition>>,
-    property_set_id: u32,
-    default_next_state_id: u32,
+    property_set_id: usize,
+    default_next_state_id: usize,
+}
+
+#[derive(Debug)]
+pub enum PropertySheetError {
+    InvalidJSON(serde_json::Error),
+    InvalidRegex(regex::Error)
 }
 
 pub struct PropertySheet {
@@ -67,9 +74,10 @@ pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
 pub struct TreePropertyCursor<'a> {
     cursor: TreeCursor<'a>,
-    state_stack: Vec<u32>,
-    child_index_stack: Vec<u32>,
+    state_stack: Vec<usize>,
+    child_index_stack: Vec<usize>,
     property_sheet: &'a PropertySheet,
+    source: &'a str,
 }
 
 impl Language {
@@ -165,10 +173,10 @@ impl Parser {
 
     pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
         let bytes = input.as_bytes();
-        self.parse_utf8(&mut |offset, _| &bytes[(offset as usize)..], old_tree)
+        self.parse_utf8(&mut |offset, _| &bytes[offset..], old_tree)
     }
 
-    pub fn parse_utf8<'a, T: FnMut(u32, Point) -> &'a [u8]>(
+    pub fn parse_utf8<'a, T: FnMut(usize, Point) -> &'a [u8]>(
         &mut self,
         input: &mut T,
         old_tree: Option<&Tree>,
@@ -182,7 +190,7 @@ impl Parser {
         )
     }
 
-    pub fn parse_utf16<'a, T: 'a + FnMut(u32, Point) -> &'a [u16]>(
+    pub fn parse_utf16<'a, T: 'a + FnMut(usize, Point) -> &'a [u16]>(
         &mut self,
         input: &mut T,
         old_tree: Option<&Tree>,
@@ -239,19 +247,19 @@ impl Parser {
         unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
     }
 
-    fn parse_utf8_ptr<T: FnMut(u32, Point) -> (*const u8, usize)>(
+    fn parse_utf8_ptr<T: FnMut(usize, Point) -> (*const u8, usize)>(
         &mut self,
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        unsafe extern "C" fn read<T: FnMut(u32, Point) -> (*const u8, usize)>(
+        unsafe extern "C" fn read<T: FnMut(usize, Point) -> (*const u8, usize)>(
             payload: *mut c_void,
             byte_offset: u32,
             position: ffi::TSPoint,
             bytes_read: *mut u32,
         ) -> *const c_char {
             let input = (payload as *mut T).as_mut().unwrap();
-            let (ptr, length) = (*input)(byte_offset, position.into());
+            let (ptr, length) = (*input)(byte_offset as usize, position.into());
             *bytes_read = length as u32;
             return ptr as *const c_char;
         };
@@ -271,12 +279,12 @@ impl Parser {
         }
     }
 
-    fn parse_utf16_ptr<T: FnMut(u32, Point) -> (*const u16, usize)>(
+    fn parse_utf16_ptr<T: FnMut(usize, Point) -> (*const u16, usize)>(
         &mut self,
         input: &mut T,
         old_tree: Option<&Tree>,
     ) -> Option<Tree> {
-        unsafe extern "C" fn read<T: FnMut(u32, Point) -> (*const u16, usize)>(
+        unsafe extern "C" fn read<T: FnMut(usize, Point) -> (*const u16, usize)>(
             payload: *mut c_void,
             byte_offset: u32,
             position: ffi::TSPoint,
@@ -284,10 +292,10 @@ impl Parser {
         ) -> *const c_char {
             let input = (payload as *mut T).as_mut().unwrap();
             let (ptr, length) = (*input)(
-                byte_offset,
+                byte_offset as usize,
                 Point {
-                    row: position.row,
-                    column: position.column / 2,
+                    row: position.row as usize,
+                    column: position.column as usize / 2,
                 },
             );
             *bytes_read = length as u32 * 2;
@@ -326,9 +334,9 @@ impl Tree {
 
     pub fn edit(&mut self, edit: &InputEdit) {
         let edit = ffi::TSInputEdit {
-            start_byte: edit.start_byte,
-            old_end_byte: edit.old_end_byte,
-            new_end_byte: edit.new_end_byte,
+            start_byte: edit.start_byte as u32,
+            old_end_byte: edit.old_end_byte as u32,
+            new_end_byte: edit.new_end_byte as u32,
             start_point: edit.start_position.into(),
             old_end_point: edit.old_end_position.into(),
             new_end_point: edit.new_end_position.into(),
@@ -399,44 +407,38 @@ impl<'tree> Node<'tree> {
         unsafe { ffi::ts_node_has_error(self.0) }
     }
 
-    pub fn start_byte(&self) -> u32 {
-        unsafe { ffi::ts_node_start_byte(self.0) }
+    pub fn start_byte(&self) -> usize {
+        unsafe { ffi::ts_node_start_byte(self.0) as usize }
     }
 
-    pub fn end_byte(&self) -> u32 {
-        unsafe { ffi::ts_node_end_byte(self.0) }
+    pub fn end_byte(&self) -> usize {
+        unsafe { ffi::ts_node_end_byte(self.0) as usize }
     }
 
     pub fn start_position(&self) -> Point {
         let result = unsafe { ffi::ts_node_start_point(self.0) };
-        Point {
-            row: result.row,
-            column: result.column,
-        }
+        result.into()
     }
 
     pub fn end_position(&self) -> Point {
         let result = unsafe { ffi::ts_node_end_point(self.0) };
-        Point {
-            row: result.row,
-            column: result.column,
-        }
+        result.into()
     }
 
-    pub fn child(&self, i: u32) -> Option<Self> {
-        Self::new(unsafe { ffi::ts_node_child(self.0, i) })
+    pub fn child(&self, i: usize) -> Option<Self> {
+        Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) })
     }
 
-    pub fn child_count(&self) -> u32 {
-        unsafe { ffi::ts_node_child_count(self.0) }
+    pub fn child_count(&self) -> usize {
+        unsafe { ffi::ts_node_child_count(self.0) as usize }
     }
 
-    pub fn named_child<'a>(&'a self, i: u32) -> Option<Self> {
-        Self::new(unsafe { ffi::ts_node_named_child(self.0, i) })
+    pub fn named_child<'a>(&'a self, i: usize) -> Option<Self> {
+        Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) })
     }
 
-    pub fn named_child_count(&self) -> u32 {
-        unsafe { ffi::ts_node_named_child_count(self.0) }
+    pub fn named_child_count(&self) -> usize {
+        unsafe { ffi::ts_node_named_child_count(self.0) as usize }
     }
 
     pub fn parent(&self) -> Option<Self> {
@@ -474,11 +476,11 @@ impl<'tree> Node<'tree> {
     }
 
     pub fn utf8_text<'a>(&self, source: &'a str) -> Result<&'a str, str::Utf8Error> {
-        str::from_utf8(&source.as_bytes()[self.start_byte() as usize..self.end_byte() as usize])
+        str::from_utf8(&source.as_bytes()[self.start_byte()..self.end_byte()])
     }
 
     pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] {
-        &source[self.start_byte() as usize..self.end_byte() as usize]
+        &source[self.start_byte()..self.end_byte()]
     }
 
     pub fn walk(&self) -> TreeCursor<'tree> {
@@ -524,12 +526,12 @@ impl<'a> TreeCursor<'a> {
         return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) };
     }
 
-    pub fn goto_first_child_for_index(&mut self, index: u32) -> Option<u32> {
-        let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index) };
+    pub fn goto_first_child_for_index(&mut self, index: usize) -> Option<usize> {
+        let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) };
         if result < 0 {
             None
         } else {
-            Some(result as u32)
+            Some(result as usize)
         }
     }
 }
@@ -541,12 +543,13 @@ impl<'a> Drop for TreeCursor<'a> {
 }
 
 impl<'a> TreePropertyCursor<'a> {
-    fn new(tree: &'a Tree, property_sheet: &'a PropertySheet) -> Self {
+    fn new(tree: &'a Tree, property_sheet: &'a PropertySheet, source: &'a str) -> Self {
         Self {
             cursor: tree.root_node().walk(),
             child_index_stack: vec![0],
             state_stack: vec![0],
             property_sheet,
+            source,
         }
     }
 
@@ -555,7 +558,7 @@ impl<'a> TreePropertyCursor<'a> {
     }
 
     pub fn node_properties(&self) -> &'a HashMap<String, String> {
-        &self.property_sheet.property_sets[self.current_state().property_set_id as usize]
+        &self.property_sheet.property_sets[self.current_state().property_set_id]
     }
 
     pub fn goto_first_child(&mut self) -> bool {
@@ -601,7 +604,7 @@ impl<'a> TreePropertyCursor<'a> {
         }
     }
 
-    fn next_state(&self, state: &PropertyState, node_kind_id: u16, node_child_index: u32) -> u32 {
+    fn next_state(&self, state: &PropertyState, node_kind_id: u16, node_child_index: usize) -> usize {
         state
             .transitions
             .get(&node_kind_id)
@@ -617,12 +620,12 @@ impl<'a> TreePropertyCursor<'a> {
     }
 
     fn current_state(&self) -> &PropertyState {
-        &self.property_sheet.states[*self.state_stack.last().unwrap() as usize]
+        &self.property_sheet.states[*self.state_stack.last().unwrap()]
     }
 }
 
 impl Point {
-    pub fn new(row: u32, column: u32) -> Self {
+    pub fn new(row: usize, column: usize) -> Self {
         Point { row, column }
     }
 }
@@ -636,8 +639,8 @@ impl fmt::Display for Point {
 impl Into<ffi::TSPoint> for Point {
     fn into(self) -> ffi::TSPoint {
         ffi::TSPoint {
-            row: self.row,
-            column: self.column,
+            row: self.row as u32,
+            column: self.column as u32,
         }
     }
 }
@@ -645,28 +648,29 @@ impl Into<ffi::TSPoint> for Point {
 impl From<ffi::TSPoint> for Point {
     fn from(point: ffi::TSPoint) -> Self {
         Self {
-            row: point.row,
-            column: point.column,
+            row: point.row as usize,
+            column: point.column as usize,
         }
     }
 }
 
 impl PropertySheet {
-    pub fn new(language: Language, json: &str) -> Result<Self, serde_json::Error> {
+    pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError> {
         #[derive(Deserialize, Debug)]
         struct PropertyTransitionJSON {
             #[serde(rename = "type")]
             kind: String,
             named: bool,
-            index: Option<u32>,
-            state_id: u32,
+            index: Option<usize>,
+            text: Option<String>,
+            state_id: usize,
         }
 
         #[derive(Deserialize, Debug)]
         struct PropertyStateJSON {
             transitions: Vec<PropertyTransitionJSON>,
-            property_set_id: u32,
-            default_next_state_id: u32,
+            property_set_id: usize,
+            default_next_state_id: usize,
         }
 
         #[derive(Deserialize, Debug)]
@@ -918,8 +922,8 @@ mod tests {
         let tree = parser
             .parse_utf8(
                 &mut |_, position| {
-                    let row = position.row as usize;
-                    let column = position.column as usize;
+                    let row = position.row;
+                    let column = position.column;
                     if row < lines.len() {
                         if column < lines[row].as_bytes().len() {
                             &lines[row].as_bytes()[column..]
@@ -958,8 +962,8 @@ mod tests {
         let tree = parser
             .parse_utf16(
                 &mut |_, position| {
-                    let row = position.row as usize;
-                    let column = position.column as usize;
+                    let row = position.row;
+                    let column = position.column;
                     if row < lines.len() {
                         if column < lines[row].len() {
                             &lines[row][column..]
@@ -1004,7 +1008,7 @@ mod tests {
         let mut tree = parser
             .parse_utf8(
                 &mut |offset, _| {
-                    let offset = offset as usize;
+                    let offset = offset;
                     if offset < input_bytes.len() {
                         let result = &input_bytes[offset..offset + 1];
                         input_bytes_read.extend(result.iter());
@@ -1043,7 +1047,7 @@ mod tests {
         let tree = parser
             .parse_utf8(
                 &mut |offset, _| {
-                    let offset = offset as usize;
+                    let offset = offset;
                     if offset < input_bytes.len() {
                         let result = &input_bytes[offset..offset + 1];
                         input_bytes_read.extend(result.iter());
@@ -1101,7 +1105,7 @@ mod tests {
                 tree_clone.edit(&InputEdit {
                     start_byte: 0,
                     old_end_byte: 0,
-                    new_end_byte: prepended_source.len() as u32,
+                    new_end_byte: prepended_source.len(),
                     start_position: Point::new(0, 0),
                     old_end_position: Point::new(0, 0),
                     new_end_position: Point::new(prepend_line_count, 0),

From d5b53cde7dded6ebbc0d78ed131e9a10f2a62c5b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 28 Nov 2018 17:26:48 -0800
Subject: [PATCH 044/208] Respect the `:text` pseudo-class in
 TreePropertyCursor

Co-Authored-By: Timothy Clem <timothy.clem@gmail.com>
---
 Cargo.toml |  1 +
 src/lib.rs | 87 ++++++++++++++++++++++++++++++++++--------------------
 2 files changed, 56 insertions(+), 32 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 2c92acc5..0ffee772 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,6 +21,7 @@ include = [
 ]
 
 [dependencies]
+regex = "1"
 serde = "1.0"
 serde_json = "1.0"
 serde_derive = "1.0"
diff --git a/src/lib.rs b/src/lib.rs
index fa3d970e..a76ed115 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,7 +3,9 @@ mod ffi;
 #[macro_use]
 extern crate serde_derive;
 extern crate serde_json;
+extern crate regex;
 
+use regex::Regex;
 use std::collections::HashMap;
 use std::ffi::CStr;
 use std::fmt;
@@ -351,8 +353,9 @@ impl Tree {
     pub fn walk_with_properties<'a>(
         &'a self,
         property_sheet: &'a PropertySheet,
+        source: &'a str,
     ) -> TreePropertyCursor<'a> {
-        TreePropertyCursor::new(self, property_sheet)
+        TreePropertyCursor::new(self, property_sheet, source)
     }
 }
 
@@ -610,9 +613,23 @@ impl<'a> TreePropertyCursor<'a> {
             .get(&node_kind_id)
             .and_then(|transitions| {
                 for transition in transitions.iter() {
-                    if transition.child_index == Some(node_child_index) || transition.child_index == None {
-                        return Some(transition.state_id);
+                    if let Some(text_regex) = transition.text_regex.as_ref() {
+                        let node = self.cursor.node();
+                        let text = &self.source.as_bytes()[node.start_byte()..node.end_byte()];
+                        if let Ok(text) = str::from_utf8(text) {
+                            if !text_regex.is_match(text) {
+                                continue;
+                            }
+                        }
                     }
+
+                    if let Some(child_index) = transition.child_index {
+                        if child_index != node_child_index {
+                            continue;
+                        }
+                    }
+
+                    return Some(transition.state_id);
                 }
                 None
             })
@@ -679,36 +696,42 @@ impl PropertySheet {
             property_sets: Vec<HashMap<String, String>>,
         }
 
-        let input: PropertySheetJSON = serde_json::from_str(json)?;
+        let input: PropertySheetJSON = serde_json::from_str(json)
+            .map_err(|e| PropertySheetError::InvalidJSON(e))?;
+        let mut states = Vec::new();
+
+        for state in input.states.iter() {
+            let mut transitions = HashMap::new();
+            let node_kind_count = language.node_kind_count();
+            for transition in state.transitions.iter() {
+                for i in 0..node_kind_count {
+                    let i = i as u16;
+                    if language.node_kind_is_named(i) == transition.named
+                        && transition.kind == language.node_kind_for_id(i)
+                    {
+                        let entry = transitions.entry(i).or_insert(Vec::new());
+                        let text_regex = if let Some(text) = transition.text.as_ref() {
+                            Some(Regex::new(&text).map_err(|e| PropertySheetError::InvalidRegex(e))?)
+                        } else {
+                            None
+                        };
+                        entry.push(PropertyTransition {
+                            child_index: transition.index,
+                            state_id: transition.state_id,
+                            text_regex
+                        });
+                    }
+                }
+            }
+            states.push(PropertyState {
+                transitions,
+                default_next_state_id: state.default_next_state_id,
+                property_set_id: state.property_set_id,
+            });
+        }
         Ok(PropertySheet {
             property_sets: input.property_sets,
-            states: input
-                .states
-                .iter()
-                .map(|state| {
-                    let mut transitions = HashMap::new();
-                    let node_kind_count = language.node_kind_count();
-                    for transition in state.transitions.iter() {
-                        for i in 0..node_kind_count {
-                            let i = i as u16;
-                            if language.node_kind_is_named(i) == transition.named
-                                && transition.kind == language.node_kind_for_id(i)
-                            {
-                                let entry = transitions.entry(i).or_insert(Vec::new());
-                                entry.push(PropertyTransition {
-                                    child_index: transition.index,
-                                    state_id: transition.state_id,
-                                });
-                            }
-                        }
-                    }
-                    PropertyState {
-                        transitions,
-                        default_next_state_id: state.default_next_state_id,
-                        property_set_id: state.property_set_id,
-                    }
-                })
-                .collect(),
+            states,
         })
     }
 }
@@ -869,7 +892,7 @@ mod tests {
         )
         .unwrap();
 
-        let mut cursor = tree.walk_with_properties(&property_sheet);
+        let mut cursor = tree.walk_with_properties(&property_sheet, "");
         assert_eq!(cursor.node().kind(), "source_file");
         assert_eq!(*cursor.node_properties(), HashMap::new());
 

From c9ce314695a5bad674aed9b267b9c430411bb731 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 29 Nov 2018 16:21:01 -0800
Subject: [PATCH 045/208] Make PropertySheet generic on the properties type

Co-Authored-By: Timothy Clem <timothy.clem@gmail.com>
---
 src/lib.rs | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index a76ed115..68715879 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,7 +4,9 @@ mod ffi;
 extern crate serde_derive;
 extern crate serde_json;
 extern crate regex;
+extern crate serde;
 
+use serde::Deserialize;
 use regex::Regex;
 use std::collections::HashMap;
 use std::ffi::CStr;
@@ -61,9 +63,10 @@ pub enum PropertySheetError {
     InvalidRegex(regex::Error)
 }
 
-pub struct PropertySheet {
+pub struct PropertySheet<'d, P: Deserialize<'d>> {
     states: Vec<PropertyState>,
-    property_sets: Vec<HashMap<String, String>>,
+    property_sets: Vec<P>,
+    _phantom: &'d std::marker::PhantomData<()>,
 }
 
 pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
@@ -74,11 +77,11 @@ pub struct Tree(*mut ffi::TSTree);
 
 pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
-pub struct TreePropertyCursor<'a> {
+pub struct TreePropertyCursor<'a, 'd, P: Deserialize<'d>> {
     cursor: TreeCursor<'a>,
     state_stack: Vec<usize>,
     child_index_stack: Vec<usize>,
-    property_sheet: &'a PropertySheet,
+    property_sheet: &'a PropertySheet<'d, P>,
     source: &'a str,
 }
 
@@ -350,11 +353,11 @@ impl Tree {
         self.root_node().walk()
     }
 
-    pub fn walk_with_properties<'a>(
+    pub fn walk_with_properties<'a, 'd, P: Deserialize<'d>>(
         &'a self,
-        property_sheet: &'a PropertySheet,
+        property_sheet: &'a PropertySheet<'d, P>,
         source: &'a str,
-    ) -> TreePropertyCursor<'a> {
+    ) -> TreePropertyCursor<'a, 'd, P> {
         TreePropertyCursor::new(self, property_sheet, source)
     }
 }
@@ -545,8 +548,8 @@ impl<'a> Drop for TreeCursor<'a> {
     }
 }
 
-impl<'a> TreePropertyCursor<'a> {
-    fn new(tree: &'a Tree, property_sheet: &'a PropertySheet, source: &'a str) -> Self {
+impl<'a, 'd, P: Deserialize<'d>> TreePropertyCursor<'a, 'd, P> {
+    fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<'d, P>, source: &'a str) -> Self {
         Self {
             cursor: tree.root_node().walk(),
             child_index_stack: vec![0],
@@ -560,7 +563,7 @@ impl<'a> TreePropertyCursor<'a> {
         self.cursor.node()
     }
 
-    pub fn node_properties(&self) -> &'a HashMap<String, String> {
+    pub fn node_properties(&self) -> &'a P {
         &self.property_sheet.property_sets[self.current_state().property_set_id]
     }
 
@@ -671,8 +674,8 @@ impl From<ffi::TSPoint> for Point {
     }
 }
 
-impl PropertySheet {
-    pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError> {
+impl<'a, P: Deserialize<'a>> PropertySheet<'a, P> {
+    pub fn new(language: Language, json: &'a str) -> Result<Self, PropertySheetError> {
         #[derive(Deserialize, Debug)]
         struct PropertyTransitionJSON {
             #[serde(rename = "type")]
@@ -691,12 +694,12 @@ impl PropertySheet {
         }
 
         #[derive(Deserialize, Debug)]
-        struct PropertySheetJSON {
+        struct PropertySheetJSON<P> {
             states: Vec<PropertyStateJSON>,
-            property_sets: Vec<HashMap<String, String>>,
+            property_sets: Vec<P>,
         }
 
-        let input: PropertySheetJSON = serde_json::from_str(json)
+        let input: PropertySheetJSON<P> = serde_json::from_str(json)
             .map_err(|e| PropertySheetError::InvalidJSON(e))?;
         let mut states = Vec::new();
 
@@ -729,9 +732,10 @@ impl PropertySheet {
                 property_set_id: state.property_set_id,
             });
         }
-        Ok(PropertySheet {
+        Ok(Self {
             property_sets: input.property_sets,
             states,
+            _phantom: &std::marker::PhantomData,
         })
     }
 }
@@ -844,7 +848,7 @@ mod tests {
         parser.set_language(rust()).unwrap();
         let tree = parser.parse_str("fn f1() { f2(); }", None).unwrap();
 
-        let property_sheet = PropertySheet::new(
+        let property_sheet = PropertySheet::<HashMap<String, String>>::new(
             rust(),
             r##"
             {

From 11610e1df66214a1bf58bff2565b52d270bf0d5b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 29 Nov 2018 20:51:50 -0800
Subject: [PATCH 046/208] Eliminate deserializer lifetime on PropertySheet

The PropertySheet is intended to be a long-lived object, whereas its
JSON source is not needed once the property sheet is instantiated.

Co-Authored-By: Timothy Clem <timothy.clem@gmail.com>
---
 src/lib.rs | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 68715879..681af7fb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -6,7 +6,7 @@ extern crate serde_json;
 extern crate regex;
 extern crate serde;
 
-use serde::Deserialize;
+use serde::de::DeserializeOwned;
 use regex::Regex;
 use std::collections::HashMap;
 use std::ffi::CStr;
@@ -63,10 +63,9 @@ pub enum PropertySheetError {
     InvalidRegex(regex::Error)
 }
 
-pub struct PropertySheet<'d, P: Deserialize<'d>> {
+pub struct PropertySheet<P: DeserializeOwned = HashMap<String, String>> {
     states: Vec<PropertyState>,
     property_sets: Vec<P>,
-    _phantom: &'d std::marker::PhantomData<()>,
 }
 
 pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
@@ -77,11 +76,11 @@ pub struct Tree(*mut ffi::TSTree);
 
 pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
-pub struct TreePropertyCursor<'a, 'd, P: Deserialize<'d>> {
+pub struct TreePropertyCursor<'a, P: 'a + DeserializeOwned> {
     cursor: TreeCursor<'a>,
     state_stack: Vec<usize>,
     child_index_stack: Vec<usize>,
-    property_sheet: &'a PropertySheet<'d, P>,
+    property_sheet: &'a PropertySheet<P>,
     source: &'a str,
 }
 
@@ -353,11 +352,11 @@ impl Tree {
         self.root_node().walk()
     }
 
-    pub fn walk_with_properties<'a, 'd, P: Deserialize<'d>>(
+    pub fn walk_with_properties<'a, P: DeserializeOwned>(
         &'a self,
-        property_sheet: &'a PropertySheet<'d, P>,
+        property_sheet: &'a PropertySheet<P>,
         source: &'a str,
-    ) -> TreePropertyCursor<'a, 'd, P> {
+    ) -> TreePropertyCursor<'a, P> {
         TreePropertyCursor::new(self, property_sheet, source)
     }
 }
@@ -548,8 +547,8 @@ impl<'a> Drop for TreeCursor<'a> {
     }
 }
 
-impl<'a, 'd, P: Deserialize<'d>> TreePropertyCursor<'a, 'd, P> {
-    fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<'d, P>, source: &'a str) -> Self {
+impl<'a, P: DeserializeOwned> TreePropertyCursor<'a, P> {
+    fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<P>, source: &'a str) -> Self {
         Self {
             cursor: tree.root_node().walk(),
             child_index_stack: vec![0],
@@ -674,8 +673,8 @@ impl From<ffi::TSPoint> for Point {
     }
 }
 
-impl<'a, P: Deserialize<'a>> PropertySheet<'a, P> {
-    pub fn new(language: Language, json: &'a str) -> Result<Self, PropertySheetError> {
+impl<P: DeserializeOwned> PropertySheet<P> {
+    pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError> {
         #[derive(Deserialize, Debug)]
         struct PropertyTransitionJSON {
             #[serde(rename = "type")]
@@ -735,7 +734,6 @@ impl<'a, P: Deserialize<'a>> PropertySheet<'a, P> {
         Ok(Self {
             property_sets: input.property_sets,
             states,
-            _phantom: &std::marker::PhantomData,
         })
     }
 }

From fbb220f19302ff44f172b6a48362ece7f62167ee Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 3 Dec 2018 10:43:58 -0800
Subject: [PATCH 047/208] Add test for regexes in property sheets

---
 src/lib.rs | 112 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 100 insertions(+), 12 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 681af7fb..724a08bd 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -844,9 +844,18 @@ mod tests {
     fn test_tree_property_matching() {
         let mut parser = Parser::new();
         parser.set_language(rust()).unwrap();
-        let tree = parser.parse_str("fn f1() { f2(); }", None).unwrap();
+        let source_code = "fn f1() { f2(); }";
+        let tree = parser.parse_str(source_code, None).unwrap();
 
-        let property_sheet = PropertySheet::<HashMap<String, String>>::new(
+        #[derive(Debug, Deserialize, PartialEq, Eq)]
+        struct Properties {
+            reference: Option<String>,
+            define: Option<String>,
+        }
+
+        let empty_properties = Properties { reference: None, define: None };
+
+        let property_sheet = PropertySheet::<Properties>::new(
             rust(),
             r##"
             {
@@ -894,47 +903,126 @@ mod tests {
         )
         .unwrap();
 
-        let mut cursor = tree.walk_with_properties(&property_sheet, "");
+        let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
         assert_eq!(cursor.node().kind(), "source_file");
-        assert_eq!(*cursor.node_properties(), HashMap::new());
+        assert_eq!(*cursor.node_properties(), empty_properties);
 
         assert!(cursor.goto_first_child());
         assert_eq!(cursor.node().kind(), "function_item");
-        assert_eq!(*cursor.node_properties(), HashMap::new());
+        assert_eq!(*cursor.node_properties(), empty_properties);
 
         assert!(cursor.goto_first_child());
         assert_eq!(cursor.node().kind(), "fn");
-        assert_eq!(*cursor.node_properties(), HashMap::new());
+        assert_eq!(*cursor.node_properties(), empty_properties);
         assert!(!cursor.goto_first_child());
 
         assert!(cursor.goto_next_sibling());
         assert_eq!(cursor.node().kind(), "identifier");
-        assert_eq!(cursor.node_properties()["define"], "function");
+        assert_eq!(cursor.node_properties().define, Some("function".to_owned()));
         assert!(!cursor.goto_first_child());
 
         assert!(cursor.goto_next_sibling());
         assert_eq!(cursor.node().kind(), "parameters");
-        assert_eq!(*cursor.node_properties(), HashMap::new());
+        assert_eq!(*cursor.node_properties(), empty_properties);
 
         assert!(cursor.goto_first_child());
         assert_eq!(cursor.node().kind(), "(");
         assert!(cursor.goto_next_sibling());
         assert_eq!(cursor.node().kind(), ")");
-        assert_eq!(*cursor.node_properties(), HashMap::new());
+        assert_eq!(*cursor.node_properties(), empty_properties);
 
         assert!(cursor.goto_parent());
         assert!(cursor.goto_next_sibling());
         assert_eq!(cursor.node().kind(), "block");
-        assert_eq!(*cursor.node_properties(), HashMap::new());
+        assert_eq!(*cursor.node_properties(), empty_properties);
 
         assert!(cursor.goto_first_child());
         assert!(cursor.goto_next_sibling());
         assert_eq!(cursor.node().kind(), "call_expression");
-        assert_eq!(*cursor.node_properties(), HashMap::new());
+        assert_eq!(*cursor.node_properties(), empty_properties);
 
         assert!(cursor.goto_first_child());
         assert_eq!(cursor.node().kind(), "identifier");
-        assert_eq!(cursor.node_properties()["reference"], "function");
+        assert_eq!(cursor.node_properties().reference, Some("function".to_owned()));
+    }
+
+    #[test]
+    fn test_tree_property_matching_with_regexes() {
+        let mut parser = Parser::new();
+        parser.set_language(rust()).unwrap();
+        let source_code = "fn f1() { None(a()) }";
+        let tree = parser.parse_str(source_code, None).unwrap();
+
+        #[derive(Debug, Deserialize, PartialEq, Eq)]
+        struct Properties {
+            scope: Option<String>,
+        }
+
+        let empty_properties = Properties { scope: None };
+
+        let property_sheet = PropertySheet::<Properties>::new(
+            rust(),
+            r##"
+            {
+                "states": [
+                    {
+                        "id": 0,
+                        "transitions": [
+                            {"type": "call_expression", "named": true, "state_id": 1}
+                        ],
+                        "default_next_state_id": 0,
+                        "property_set_id": 0
+                    },
+                    {
+                        "id": 1,
+                        "transitions": [
+                            {"type": "identifier", "named": true, "text": "^[A-Z]", "state_id": 2},
+                            {"type": "identifier", "named": true, "state_id": 3}
+                        ],
+                        "default_next_state_id": 0,
+                        "property_set_id": 0
+                    },
+                    {
+                        "transitions": [],
+                        "default_next_state_id": 0,
+                        "property_set_id": 1
+                    },
+                    {
+                        "transitions": [],
+                        "default_next_state_id": 0,
+                        "property_set_id": 2
+                    }
+                ],
+                "property_sets": [
+                    {},
+                    {"scope": "constructor"},
+                    {"scope": "function"}
+                ]
+            }
+        "##,
+        )
+        .unwrap();
+
+        let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
+        assert_eq!(cursor.node().kind(), "source_file");
+        assert_eq!(*cursor.node_properties(), empty_properties);
+
+        cursor.goto_first_child();
+        assert!(cursor.goto_first_child());
+        assert!(cursor.goto_next_sibling());
+        assert!(cursor.goto_next_sibling());
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), "block");
+        assert_eq!(*cursor.node_properties(), empty_properties);
+
+        assert!(cursor.goto_first_child());
+        assert!(cursor.goto_next_sibling());
+        assert_eq!(cursor.node().kind(), "call_expression");
+        assert_eq!(*cursor.node_properties(), empty_properties);
+
+        assert!(cursor.goto_first_child());
+        assert_eq!(cursor.node().kind(), "identifier");
+        assert_eq!(cursor.node_properties().scope, Some("constructor".to_owned()));
     }
 
     #[test]

From beb60194d12b62cf70bc6b9e8652258ae07a9b44 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 3 Dec 2018 14:42:18 -0800
Subject: [PATCH 048/208] 0.3.3

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 0ffee772..f61b1583 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.3.2"
+version = "0.3.3"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
 license = "MIT"

From a4c4b85a16ce0ecbb550d6de47801d2e387e629b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 5 Dec 2018 12:50:12 -0800
Subject: [PATCH 049/208] Initial commit

---
 .gitignore                                    |   2 +
 Cargo.lock                                    | 812 ++++++++++++++++++
 Cargo.toml                                    |  17 +
 src/build_tables/item.rs                      |  22 +
 src/build_tables/mod.rs                       |  34 +
 src/error.rs                                  |  13 +
 src/generate.rs                               |  26 +
 src/grammars.rs                               |  98 +++
 src/main.rs                                   |  35 +
 src/parse_grammar.rs                          | 153 ++++
 src/prepare_grammar/expand_repeats.rs         | 220 +++++
 src/prepare_grammar/extract_simple_aliases.rs |  10 +
 src/prepare_grammar/extract_tokens.rs         |   7 +
 src/prepare_grammar/flatten_grammar.rs        |   7 +
 src/prepare_grammar/intern_symbols.rs         | 237 +++++
 src/prepare_grammar/mod.rs                    |  40 +
 src/prepare_grammar/normalize_rules.rs        |   5 +
 src/render/mod.rs                             |  16 +
 src/rules.rs                                  | 205 +++++
 src/tables.rs                                 |  77 ++
 20 files changed, 2036 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Cargo.lock
 create mode 100644 Cargo.toml
 create mode 100644 src/build_tables/item.rs
 create mode 100644 src/build_tables/mod.rs
 create mode 100644 src/error.rs
 create mode 100644 src/generate.rs
 create mode 100644 src/grammars.rs
 create mode 100644 src/main.rs
 create mode 100644 src/parse_grammar.rs
 create mode 100644 src/prepare_grammar/expand_repeats.rs
 create mode 100644 src/prepare_grammar/extract_simple_aliases.rs
 create mode 100644 src/prepare_grammar/extract_tokens.rs
 create mode 100644 src/prepare_grammar/flatten_grammar.rs
 create mode 100644 src/prepare_grammar/intern_symbols.rs
 create mode 100644 src/prepare_grammar/mod.rs
 create mode 100644 src/prepare_grammar/normalize_rules.rs
 create mode 100644 src/render/mod.rs
 create mode 100644 src/rules.rs
 create mode 100644 src/tables.rs

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..53eaa219
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+**/*.rs.bk
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 00000000..20908681
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,812 @@
+[[package]]
+name = "aho-corasick"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "ansi_term"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "argon2rs"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)",
+ "scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "arrayvec"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "atty"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "backtrace"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "backtrace-sys"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "bitvec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "blake2-rfc"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "cc"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "cfg-if"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "clap"
+version = "2.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "cloudabi"
+version = "0.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "constant_time_eq"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "dirs"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "failure"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "failure_derive"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
+ "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "fuchsia-zircon"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "fuchsia-zircon-sys"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "globset"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "ignore"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "itoa"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "lazy_static"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "libc"
+version = "0.2.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "libloading"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "libsqlite3-sys"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
+ "vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "linked-hash-map"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "lock_api"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "log"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "lru-cache"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "linked-hash-map 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "memchr"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "memoffset"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "nodrop"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "owning_ref"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "pkg-config"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "proc-macro2"
+version = "0.4.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "quote"
+version = "0.6.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "redox_syscall"
+version = "0.1.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "redox_termios"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "redox_users"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "argon2rs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rusqlite"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libsqlite3-sys 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lru-cache 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rust-tree-sitter-cli"
+version = "0.1.0"
+dependencies = [
+ "bitvec 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tree-sitter 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "rustc_version"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "ryu"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "same-file"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "scoped_threadpool"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "scopeguard"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "semver"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "semver-parser"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "serde"
+version = "1.0.80"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "serde_derive"
+version = "1.0.80"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "smallvec"
+version = "0.6.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "strsim"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "syn"
+version = "0.15.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "termion"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
+ "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "thread_local"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "time"
+version = "0.1.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "tree-sitter"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "ucd-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "unicode-xid"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "unreachable"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "utf8-ranges"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "vec_map"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "version_check"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "void"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "walkdir"
+version = "2.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[metadata]
+"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
+"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
+"checksum argon2rs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3f67b0b6a86dae6e67ff4ca2b6201396074996379fba2b92ff649126f37cb392"
+"checksum arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f405cc4c21cd8b784f6c8fc2adf9bc00f59558f0049b5ec21517f875963040cc"
+"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
+"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a"
+"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
+"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
+"checksum bitvec 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e37e2176261200377c7cde4c6de020394174df556c356f965e4bc239f5ce1c5a"
+"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
+"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
+"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
+"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
+"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
+"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e"
+"checksum crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7b85741761b7f160bc5e7e0c14986ef685b7f8bf9b7ad081c60c604bb4649827"
+"checksum crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2449aaa4ec7ef96e5fb24db16024b935df718e9ae1cec0a1e68feeca2efca7b8"
+"checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015"
+"checksum crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c55913cc2799171a550e307918c0a360e8c16004820291bf3b638969b4a01816"
+"checksum dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "88972de891f6118092b643d85a0b28e0678e0f948d7f879aa32f2d5aafe97d2a"
+"checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7"
+"checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596"
+"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
+"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
+"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
+"checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
+"checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01"
+"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
+"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
+"checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311"
+"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2"
+"checksum libsqlite3-sys 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d3711dfd91a1081d2458ad2d06ea30a8755256e74038be2ad927d94e1c955ca8"
+"checksum linked-hash-map 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7860ec297f7008ff7a1e3382d7f7e1dcd69efc94751a2284bafc3d013c2aa939"
+"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c"
+"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
+"checksum lru-cache 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4d06ff7ff06f729ce5f4e227876cb88d10bc59cd4ae1e09fbb2bde15c850dc21"
+"checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
+"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
+"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
+"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13"
+"checksum parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0802bff09003b291ba756dc7e79313e51cc31667e94afbe847def490424cde5"
+"checksum parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad7f7e6ebdc79edff6fdcb87a55b620174f7a989e3eb31b65231f4af57f00b8c"
+"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
+"checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09"
+"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c"
+"checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
+"checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c"
+"checksum rand_core 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1961a422c4d189dfb50ffa9320bf1f2a9bd54ecb92792fb9477f99a1045f3372"
+"checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db"
+"checksum redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d"
+"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
+"checksum redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "214a97e49be64fd2c86f568dd0cb2c757d2cc53de95b273b6ad0a1c908482f26"
+"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f"
+"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1"
+"checksum rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c9d9118f1ce84d8d0b67f9779936432fb42bb620cef2122409d786892cce9a3c"
+"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
+"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
+"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
+"checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267"
+"checksum scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
+"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
+"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
+"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
+"checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
+"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
+"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
+"checksum smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b73ea3738b47563803ef814925e69be00799a8c07420be8b996f8e98fb2336db"
+"checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8"
+"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
+"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
+"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015"
+"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
+"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6"
+"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
+"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b"
+"checksum tree-sitter 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "311adf1e004ac816285a1196c93ea36364857c3adc37ffc9fd5ed0d70545391a"
+"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
+"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
+"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
+"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
+"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
+"checksum vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "def296d3eb3b12371b2c7d0e83bfe1403e4db2d7a0bba324a12b21c4ee13143d"
+"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
+"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
+"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
+"checksum walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d9d7ed3431229a144296213105a390676cc49c9b6a72bd19f3176c98e129fa1"
+"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
+"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+"checksum winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "afc5508759c5bf4285e61feb862b6083c8480aec864fa17a81fdec6f69b461ab"
+"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 00000000..965cc81e
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "rust-tree-sitter-cli"
+version = "0.1.0"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+bitvec = "0.8"
+clap = "2.32"
+dirs = "1.0.2"
+ignore = "0.4.4"
+libloading = "0.5"
+rusqlite = "0.14.0"
+serde = "1.0"
+serde_derive = "1.0"
+serde_json = "1.0"
+tree-sitter = "0.3.1"
diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
new file mode 100644
index 00000000..c8d30997
--- /dev/null
+++ b/src/build_tables/item.rs
@@ -0,0 +1,22 @@
+use crate::grammars::Production;
+use std::collections::HashMap;
+use bitvec::BitVec;
+
+#[derive(Debug, PartialEq, Eq)]
+pub(super) struct LookaheadSet {
+    terminal_bits: BitVec,
+    external_bits: BitVec,
+    eof: bool,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub(super) struct ParseItem {
+    variable_index: u32,
+    production_index: u32,
+    step_index: u32,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub(super) struct ParseItemSet {
+    entries: HashMap<ParseItem, LookaheadSet>
+}
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
new file mode 100644
index 00000000..c5dd5b54
--- /dev/null
+++ b/src/build_tables/mod.rs
@@ -0,0 +1,34 @@
+mod item;
+
+use std::collections::{HashMap, VecDeque};
+use crate::grammars::{SyntaxGrammar, LexicalGrammar};
+use crate::tables::{ParseTable, LexTable, ParseStateId};
+use crate::rules::{AliasMap, Symbol};
+use crate::error::Result;
+use self::item::ParseItemSet;
+
+type SymbolSequence = Vec<Symbol>;
+
+struct ParseStateQueueEntry {
+    preceding_symbols: SymbolSequence,
+    item_set: ParseItemSet,
+    state_id: ParseStateId,
+}
+
+struct ParseTableBuilder<'a> {
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    simple_aliases: &'a AliasMap,
+    state_ids_by_item_set: HashMap<ParseItemSet, ParseStateId>,
+    item_sets_by_state_id: Vec<&'a ParseItemSet>,
+    parse_state_queue: VecDeque<ParseStateQueueEntry>,
+    parse_table: ParseTable,
+}
+
+pub fn build_tables(
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    simple_aliases: &AliasMap
+) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
+    unimplemented!();
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 00000000..90e7b8f9
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,13 @@
+#[derive(Debug)]
+pub enum Error {
+    GrammarError(String),
+    SymbolError(String),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+impl From<serde_json::Error> for Error {
+    fn from(error: serde_json::Error) -> Self {
+        Error::GrammarError(error.to_string())
+    }
+}
diff --git a/src/generate.rs b/src/generate.rs
new file mode 100644
index 00000000..4507fb6f
--- /dev/null
+++ b/src/generate.rs
@@ -0,0 +1,26 @@
+use crate::error::Result;
+use crate::parse_grammar::parse_grammar;
+use crate::prepare_grammar::prepare_grammar;
+use crate::build_tables::build_tables;
+use crate::render::render_c_code;
+
+pub fn generate_parser_for_grammar(input: String) -> Result<String> {
+    let input_grammar = parse_grammar(&input)?;
+    let (syntax_grammar, lexical_grammar, simple_aliases) = prepare_grammar(&input_grammar)?;
+    let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
+        &syntax_grammar,
+        &lexical_grammar,
+        &simple_aliases
+    )?;
+    let c_code = render_c_code(
+        &input_grammar.name,
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        keyword_capture_token,
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases
+    );
+    Ok(c_code)
+}
diff --git a/src/grammars.rs b/src/grammars.rs
new file mode 100644
index 00000000..6f5b772e
--- /dev/null
+++ b/src/grammars.rs
@@ -0,0 +1,98 @@
+use crate::rules::{Associativity, Alias, Rule, Symbol};
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum VariableType {
+    Hidden,
+    Auxiliary,
+    Anonymous,
+    Named
+}
+
+// Input grammar
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct InputVariable {
+    pub name: String,
+    pub kind: VariableType,
+    pub rule: Rule,
+}
+
+#[derive(PartialEq, Eq)]
+pub struct InputGrammar {
+    pub name: String,
+    pub variables: Vec<InputVariable>,
+    pub extra_tokens: Vec<Rule>,
+    pub expected_conflicts: Vec<Vec<String>>,
+    pub external_tokens: Vec<Rule>,
+    pub variables_to_inline: Vec<String>,
+    pub word_token: Option<String>,
+}
+
+// Extracted lexical grammar
+
+#[derive(PartialEq, Eq)]
+pub struct LexicalVariable {
+    name: String,
+    kind: VariableType,
+    rule: Rule,
+    is_string: bool,
+}
+
+pub struct LexicalGrammar {
+    variables: Vec<LexicalVariable>,
+    separators: Vec<Rule>,
+}
+
+// Extracted syntax grammar
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct ProductionStep {
+  symbol: Symbol,
+  precedence: i32,
+  associativity: Option<Associativity>,
+  alias: Option<Alias>,
+  is_excluded: bool,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Production {
+    steps: Vec<ProductionStep>,
+    dynamic_precedence: i32,
+}
+
+#[derive(Clone, PartialEq, Eq)]
+pub struct SyntaxVariable {
+    name: String,
+    kind: VariableType,
+}
+
+#[derive(Clone, PartialEq, Eq)]
+pub struct ExternalToken {
+    name: String,
+    kind: VariableType,
+    corresponding_internal_token: Symbol,
+}
+
+pub struct SyntaxGrammar {
+    variables: Vec<SyntaxVariable>,
+    extra_tokens: Vec<Symbol>,
+    expected_conflicts: Vec<Vec<Symbol>>,
+    external_tokens: Vec<ExternalToken>,
+    variables_to_inline: Vec<Symbol>,
+    word_token: Symbol,
+}
+
+#[cfg(test)]
+impl InputVariable {
+    pub fn named(name: &str, rule: Rule) -> Self {
+        Self { name: name.to_string(), kind: VariableType::Named, rule }
+    }
+
+    pub fn auxiliary(name: &str, rule: Rule) -> Self {
+        Self { name: name.to_string(), kind: VariableType::Auxiliary, rule }
+    }
+
+    pub fn hidden(name: &str, rule: Rule) -> Self {
+        Self { name: name.to_string(), kind: VariableType::Hidden, rule }
+    }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 00000000..3eeb306a
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,35 @@
+use clap::{App, Arg, SubCommand};
+
+#[macro_use] extern crate serde_derive;
+#[macro_use] extern crate serde_json;
+
+mod build_tables;
+mod error;
+mod generate;
+mod grammars;
+mod parse_grammar;
+mod prepare_grammar;
+mod render;
+mod rules;
+mod tables;
+
+fn main() {
+    let matches = App::new("tree-sitter")
+        .version("0.1")
+        .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
+        .about("Generates and tests parsers")
+        .subcommand(
+            SubCommand::with_name("generate")
+                .about("Generate a parser")
+        ).subcommand(
+            SubCommand::with_name("parse")
+                .about("Parse a file")
+                .arg(Arg::with_name("path").index(1))
+        ).subcommand(
+            SubCommand::with_name("test")
+                .about("Run a parser's tests")
+                .arg(Arg::with_name("path").index(1).required(true))
+                .arg(Arg::with_name("line").index(2).required(true))
+                .arg(Arg::with_name("column").index(3).required(true))
+        );
+}
diff --git a/src/parse_grammar.rs b/src/parse_grammar.rs
new file mode 100644
index 00000000..4c21e5ba
--- /dev/null
+++ b/src/parse_grammar.rs
@@ -0,0 +1,153 @@
+use serde_json::{Map, Value};
+use crate::error::Result;
+use crate::grammars::{InputGrammar, InputVariable, VariableType};
+use crate::rules::Rule;
+use std::collections::HashMap;
+
+#[derive(Deserialize)]
+#[serde(tag = "type")]
+#[allow(non_camel_case_types)]
+pub enum RuleJSON {
+    BLANK,
+    STRING {
+        value: String,
+    },
+    PATTERN {
+        value: String,
+    },
+    SYMBOL {
+        name: String,
+    },
+    CHOICE {
+        members: Vec<RuleJSON>,
+    },
+    SEQ {
+        members: Vec<RuleJSON>,
+    },
+    REPEAT {
+        content: Box<RuleJSON>,
+    },
+    PREC_LEFT {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    PREC_RIGHT {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    PREC {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
+    TOKEN {
+        content: Box<RuleJSON>,
+    },
+    TOKEN_IMMEDIATE {
+        content: Box<RuleJSON>,
+    },
+}
+
+#[derive(Deserialize)]
+struct GrammarJSON {
+    name: String,
+    rules: Map<String, Value>,
+    conflicts: Option<Vec<Vec<String>>>,
+    externals: Option<Vec<RuleJSON>>,
+    extras: Option<Vec<RuleJSON>>,
+    inline: Option<Vec<String>>,
+    word: Option<String>,
+}
+
+pub fn parse_grammar(input: &str) -> Result<InputGrammar> {
+    let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
+
+    let mut variables = Vec::with_capacity(grammar_json.rules.len());
+    for (name, value) in grammar_json.rules {
+        variables.push(InputVariable {
+            name: name.to_owned(),
+            kind: VariableType::Named,
+            rule: parse_rule(serde_json::from_value(value)?),
+        })
+    }
+
+    let extra_tokens = grammar_json.extras
+        .unwrap_or(Vec::new())
+        .into_iter()
+        .map(parse_rule)
+        .collect();
+    let external_tokens = grammar_json.externals
+        .unwrap_or(Vec::new())
+        .into_iter()
+        .map(parse_rule)
+        .collect();
+    let expected_conflicts = grammar_json.conflicts
+        .unwrap_or(Vec::new());
+    let variables_to_inline = grammar_json.inline
+        .unwrap_or(Vec::new());
+
+    Ok(InputGrammar {
+        name: grammar_json.name,
+        word_token: grammar_json.word,
+        variables,
+        extra_tokens,
+        expected_conflicts,
+        external_tokens,
+        variables_to_inline,
+    })
+}
+
+fn parse_rule(json: RuleJSON) -> Rule {
+    match json {
+        RuleJSON::BLANK => Rule::Blank,
+        RuleJSON::STRING { value } => Rule::String(value),
+        RuleJSON::PATTERN { value } => Rule::Pattern(value),
+        RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
+        RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
+        RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
+        RuleJSON::REPEAT { content } => Rule::repeat(parse_rule(*content)),
+        RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)),
+        RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)),
+        RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)),
+        RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
+        RuleJSON::TOKEN_IMMEDIATE { content } => Rule::immediate_token(parse_rule(*content)),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_grammar() {
+        let grammar = parse_grammar(&json!({
+            "name": "my_lang",
+            "rules": {
+                "file": {
+                    "type": "REPEAT",
+                    "content": {
+                        "type": "SYMBOL",
+                        "name": "statement"
+                    }
+                },
+                "statement": {
+                    "type": "STRING",
+                    "value": "foo"
+                }
+            }
+        }).to_string()).unwrap();
+
+        assert_eq!(grammar.name, "my_lang");
+        assert_eq!(grammar.variables, vec![
+            InputVariable {
+                name: "file".to_string(),
+                kind: VariableType::Named,
+                rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
+            },
+            InputVariable {
+                name: "statement".to_string(),
+                kind: VariableType::Named,
+                rule: Rule::String("foo".to_string())
+            },
+        ]);
+    }
+}
diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs
new file mode 100644
index 00000000..69db150c
--- /dev/null
+++ b/src/prepare_grammar/expand_repeats.rs
@@ -0,0 +1,220 @@
+use crate::rules::{Rule, Symbol};
+use crate::grammars::{InputVariable, VariableType};
+use std::collections::HashMap;
+use std::mem;
+use std::rc::Rc;
+use super::ExtractedGrammar;
+
+struct Expander {
+    variable_name: String,
+    repeat_count_in_variable: usize,
+    preceding_symbol_count: usize,
+    auxiliary_variables: Vec<InputVariable>,
+    existing_repeats: HashMap<Rule, Symbol>
+}
+
+impl Expander {
+    fn expand_variable(&mut self, variable: &mut InputVariable) {
+        self.variable_name.clear();
+        self.variable_name.push_str(&variable.name);
+        self.repeat_count_in_variable = 0;
+        let mut rule = Rule::Blank;
+        mem::swap(&mut rule, &mut variable.rule);
+        variable.rule = self.expand_rule(&rule);
+    }
+
+    fn expand_rule(&mut self, rule: &Rule) -> Rule {
+        match rule {
+            Rule::Choice { elements } =>
+                Rule::Choice {
+                    elements: elements.iter().map(|element| self.expand_rule(element)).collect()
+                },
+
+            Rule::Seq { left, right } =>
+                Rule::Seq {
+                    left: Rc::new(self.expand_rule(left)),
+                    right: Rc::new(self.expand_rule(right)),
+                },
+
+            Rule::Repeat(content) => {
+                let inner_rule = self.expand_rule(content);
+
+                if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) {
+                    return Rule::Symbol(*existing_symbol);
+                }
+
+                self.repeat_count_in_variable += 1;
+                let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable);
+                let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
+                let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol));
+                self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
+                self.auxiliary_variables.push(InputVariable {
+                    name: rule_name,
+                    kind: VariableType::Auxiliary,
+                    rule: Rule::Choice {
+                        elements: vec![
+                            Rule::Seq {
+                                left: rc_symbol.clone(),
+                                right: rc_symbol
+                            },
+                            inner_rule
+                        ],
+                    },
+                });
+
+                Rule::Symbol(repeat_symbol)
+            }
+
+            Rule::Metadata { rule, params } => Rule::Metadata {
+                rule: Rc::new(self.expand_rule(rule)),
+                params: params.clone()
+            },
+
+            _ => rule.clone()
+        }
+    }
+}
+
+pub(super) fn expand_repeats(mut grammar: ExtractedGrammar) -> ExtractedGrammar {
+    let mut expander = Expander {
+        variable_name: String::new(),
+        repeat_count_in_variable: 0,
+        preceding_symbol_count: grammar.variables.len(),
+        auxiliary_variables: Vec::new(),
+        existing_repeats: HashMap::new(),
+    };
+
+    for mut variable in grammar.variables.iter_mut() {
+        expander.expand_variable(&mut variable);
+    }
+
+    grammar.variables.extend(expander.auxiliary_variables.into_iter());
+    grammar
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_repeat_expansion() {
+        // Repeats nested inside of sequences and choices are expanded.
+        let grammar = expand_repeats(build_grammar(vec![
+            InputVariable::named("rule0", Rule::seq(vec![
+                Rule::terminal(10),
+                Rule::choice(vec![
+                    Rule::repeat(Rule::terminal(11)),
+                    Rule::repeat(Rule::terminal(12)),
+                ]),
+                Rule::terminal(13),
+            ])),
+        ]));
+
+        assert_eq!(grammar.variables, vec![
+            InputVariable::named("rule0", Rule::seq(vec![
+                Rule::terminal(10),
+                Rule::choice(vec![
+                    Rule::non_terminal(1),
+                    Rule::non_terminal(2),
+                ]),
+                Rule::terminal(13),
+            ])),
+            InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
+                Rule::seq(vec![
+                    Rule::non_terminal(1),
+                    Rule::non_terminal(1),
+                ]),
+                Rule::terminal(11),
+            ])),
+            InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![
+                Rule::seq(vec![
+                    Rule::non_terminal(2),
+                    Rule::non_terminal(2),
+                ]),
+                Rule::terminal(12),
+            ])),
+        ]);
+    }
+
+    #[test]
+    fn test_repeat_deduplication() {
+        // Terminal 4 appears inside of a repeat in three different places.
+        let grammar = expand_repeats(build_grammar(vec![
+            InputVariable::named("rule0", Rule::choice(vec![
+                Rule::seq(vec![ Rule::terminal(1), Rule::repeat(Rule::terminal(4)) ]),
+                Rule::seq(vec![ Rule::terminal(2), Rule::repeat(Rule::terminal(4)) ]),
+            ])),
+            InputVariable::named("rule1", Rule::seq(vec![
+                Rule::terminal(3),
+                Rule::repeat(Rule::terminal(4)),
+            ])),
+        ]));
+
+        // Only one auxiliary rule is created for repeating terminal 4.
+        assert_eq!(grammar.variables, vec![
+            InputVariable::named("rule0", Rule::choice(vec![
+                Rule::seq(vec![ Rule::terminal(1), Rule::non_terminal(2) ]),
+                Rule::seq(vec![ Rule::terminal(2), Rule::non_terminal(2) ]),
+            ])),
+            InputVariable::named("rule1", Rule::seq(vec![
+                Rule::terminal(3),
+                Rule::non_terminal(2),
+            ])),
+            InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
+                Rule::seq(vec![
+                    Rule::non_terminal(2),
+                    Rule::non_terminal(2),
+                ]),
+                Rule::terminal(4),
+            ]))
+        ]);
+    }
+
+    #[test]
+    fn test_expansion_of_nested_repeats() {
+        let grammar = expand_repeats(build_grammar(vec![
+            InputVariable::named("rule0", Rule::seq(vec![
+                Rule::terminal(10),
+                Rule::repeat(Rule::seq(vec![
+                    Rule::terminal(11),
+                    Rule::repeat(Rule::terminal(12))
+                ])),
+            ])),
+        ]));
+
+        assert_eq!(grammar.variables, vec![
+            InputVariable::named("rule0", Rule::seq(vec![
+                Rule::terminal(10),
+                Rule::non_terminal(2),
+            ])),
+            InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
+                Rule::seq(vec![
+                    Rule::non_terminal(1),
+                    Rule::non_terminal(1),
+                ]),
+                Rule::terminal(12),
+            ])),
+            InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![
+                Rule::seq(vec![
+                    Rule::non_terminal(2),
+                    Rule::non_terminal(2),
+                ]),
+                Rule::seq(vec![
+                    Rule::terminal(11),
+                    Rule::non_terminal(1),
+                ]),
+            ])),
+        ]);
+    }
+
+    fn build_grammar(variables: Vec<InputVariable>) -> ExtractedGrammar {
+        ExtractedGrammar {
+            variables,
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+        }
+    }
+}
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs
new file mode 100644
index 00000000..250246f3
--- /dev/null
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@@ -0,0 +1,10 @@
+use crate::rules::AliasMap;
+use crate::grammars::{LexicalGrammar, SyntaxGrammar};
+use super::ExtractedGrammar;
+
+pub(super) fn extract_simple_aliases(
+    syntax_grammar: &mut SyntaxGrammar,
+    lexical_grammar: &mut LexicalGrammar
+) -> AliasMap {
+    unimplemented!();
+}
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
new file mode 100644
index 00000000..660d3819
--- /dev/null
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -0,0 +1,7 @@
+use crate::error::Result;
+use crate::grammars::LexicalGrammar;
+use super::{InternedGrammar, ExtractedGrammar};
+
+pub(super) fn extract_tokens(grammar: InternedGrammar) -> Result<(ExtractedGrammar, LexicalGrammar)> {
+    unimplemented!();
+}
diff --git a/src/prepare_grammar/flatten_grammar.rs b/src/prepare_grammar/flatten_grammar.rs
new file mode 100644
index 00000000..36fe76c9
--- /dev/null
+++ b/src/prepare_grammar/flatten_grammar.rs
@@ -0,0 +1,7 @@
+use crate::error::Result;
+use crate::grammars::SyntaxGrammar;
+use super::ExtractedGrammar;
+
+pub(super) fn flatten_grammar(grammar: ExtractedGrammar) -> Result<SyntaxGrammar> {
+    unimplemented!();
+}
diff --git a/src/prepare_grammar/intern_symbols.rs b/src/prepare_grammar/intern_symbols.rs
new file mode 100644
index 00000000..00a5c330
--- /dev/null
+++ b/src/prepare_grammar/intern_symbols.rs
@@ -0,0 +1,237 @@
+use crate::error::{Error, Result};
+use crate::rules::{Rule, Symbol};
+use crate::grammars::{InputGrammar, InputVariable, VariableType};
+use std::rc::Rc;
+use super::InternedGrammar;
+
+pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
+    let interner = Interner { grammar };
+
+    if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
+        return Err(Error::GrammarError("Grammar's start rule must be visible".to_string()));
+    }
+
+    let mut variables = Vec::with_capacity(grammar.variables.len());
+    for variable in grammar.variables.iter() {
+        variables.push(InputVariable {
+            name: variable.name.clone(),
+            kind: variable_type_for_name(&variable.name),
+            rule: interner.intern_rule(&variable.rule)?,
+        });
+    }
+
+    let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
+    for external_token in grammar.external_tokens.iter() {
+        let rule = interner.intern_rule(&external_token)?;
+        let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
+            (name.clone(), variable_type_for_name(&name))
+        } else {
+            (String::new(), VariableType::Anonymous)
+        };
+        external_tokens.push(InputVariable { name, kind, rule });
+    }
+
+    let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
+    for extra_token in grammar.extra_tokens.iter() {
+        extra_tokens.push(interner.intern_rule(extra_token)?);
+    }
+
+    let mut expected_conflicts = Vec::new();
+    for conflict in grammar.expected_conflicts.iter() {
+        let mut interned_conflict = Vec::with_capacity(conflict.len());
+        for name in conflict {
+            interned_conflict.push(interner
+                .intern_name(&name)
+                .ok_or_else(|| symbol_error(name))?
+            );
+        }
+        expected_conflicts.push(interned_conflict);
+    }
+
+    let mut variables_to_inline = Vec::new();
+    for name in grammar.variables_to_inline.iter() {
+        if let Some(symbol) = interner.intern_name(&name) {
+            variables_to_inline.push(symbol);
+        }
+    }
+
+    let mut word_token = None;
+    if let Some(name) = grammar.word_token.as_ref() {
+        word_token = Some(interner
+            .intern_name(&name)
+            .ok_or_else(|| symbol_error(&name))?
+        );
+    }
+
+    Ok(InternedGrammar {
+        variables,
+        external_tokens,
+        extra_tokens,
+        expected_conflicts,
+        variables_to_inline,
+        word_token,
+    })
+}
+
+struct Interner<'a> {
+    grammar: &'a InputGrammar
+}
+
+impl<'a> Interner<'a> {
+    fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
+        match rule {
+            Rule::Choice { elements } => {
+                let mut result = Vec::with_capacity(elements.len());
+                for element in elements {
+                    result.push(self.intern_rule(element)?);
+                }
+                Ok(Rule::Choice { elements: result })
+            },
+
+            Rule::Seq { left, right } =>
+                Ok(Rule::Seq {
+                    left: Rc::new(self.intern_rule(left)?),
+                    right: Rc::new(self.intern_rule(right)?),
+                }),
+
+            Rule::Repeat(content) =>
+                Ok(Rule::Repeat(Rc::new(self.intern_rule(content)?))),
+
+            Rule::Metadata { rule, params } =>
+                Ok(Rule::Metadata {
+                    rule: Rc::new(self.intern_rule(rule)?),
+                    params: params.clone()
+                }),
+
+            Rule::NamedSymbol(name) => {
+                if let Some(symbol) = self.intern_name(&name) {
+                    Ok(Rule::Symbol(symbol))
+                } else {
+                    Err(symbol_error(name))
+                }
+            },
+
+            _ => Ok(rule.clone())
+
+        }
+    }
+
+    fn intern_name(&self, symbol: &str) -> Option<Symbol> {
+        for (i, variable) in self.grammar.variables.iter().enumerate() {
+            if variable.name == symbol {
+                return Some(Symbol::non_terminal(i))
+            }
+        }
+
+        for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
+            if let Rule::NamedSymbol(name) = external_token {
+                if name == symbol {
+                    return Some(Symbol::external(i))
+                }
+            }
+        }
+
+        return None
+    }
+}
+
+fn symbol_error(name: &str) -> Error {
+    Error::SymbolError(format!("Undefined symbol '{}'", name))
+}
+
+fn variable_type_for_name(name: &str) -> VariableType {
+    if name.starts_with("_") {
+        VariableType::Hidden
+    } else {
+        VariableType::Named
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_repeat_expansion() {
+        let grammar = intern_symbols(&build_grammar(vec![
+            InputVariable::named("x", Rule::choice(vec![
+                Rule::named("y"),
+                Rule::named("_z"),
+            ])),
+            InputVariable::named("y", Rule::named("_z")),
+            InputVariable::named("_z", Rule::string("a")),
+        ])).unwrap();
+
+        assert_eq!(grammar.variables, vec![
+            InputVariable::named("x", Rule::choice(vec![
+                Rule::non_terminal(1),
+                Rule::non_terminal(2),
+            ])),
+            InputVariable::named("y", Rule::non_terminal(2)),
+            InputVariable::hidden("_z", Rule::string("a")),
+        ]);
+    }
+
+    #[test]
+    fn test_interning_external_token_names() {
+        // Variable `y` is both an internal and an external token.
+        // Variable `z` is just an external token.
+        let mut input_grammar = build_grammar(vec![
+            InputVariable::named("w", Rule::choice(vec![
+                Rule::named("x"),
+                Rule::named("y"),
+                Rule::named("z"),
+            ])),
+            InputVariable::named("x", Rule::string("a")),
+            InputVariable::named("y", Rule::string("b")),
+        ]);
+        input_grammar.external_tokens.extend(vec![
+            Rule::named("y"),
+            Rule::named("z"),
+        ]);
+
+        let grammar = intern_symbols(&input_grammar).unwrap();
+
+        // Variable `y` is referred to by its internal index.
+        // Variable `z` is referred to by its external index.
+        assert_eq!(grammar.variables, vec![
+            InputVariable::named("w", Rule::choice(vec![
+                Rule::non_terminal(1),
+                Rule::non_terminal(2),
+                Rule::external(1),
+            ])),
+            InputVariable::named("x", Rule::string("a")),
+            InputVariable::named("y", Rule::string("b")),
+        ]);
+
+        // The external token for `y` refers back to its internal index.
+        assert_eq!(grammar.external_tokens, vec![
+            InputVariable::named("y", Rule::non_terminal(2)),
+            InputVariable::named("z", Rule::external(1)),
+        ]);
+    }
+
+    #[test]
+    fn test_grammar_with_undefined_symbols() {
+        let result = intern_symbols(&build_grammar(vec![
+            InputVariable::named("x", Rule::named("y")),
+        ]));
+
+        match result {
+            Err(Error::SymbolError(message)) => assert_eq!(message, "Undefined symbol 'y'"),
+            _ => panic!("Expected an error but got none"),
+        }
+    }
+
+    fn build_grammar(variables: Vec<InputVariable>) -> InputGrammar {
+        InputGrammar {
+            variables,
+            name: "the_language".to_string(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+        }
+    }
+}
diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs
new file mode 100644
index 00000000..0788edca
--- /dev/null
+++ b/src/prepare_grammar/mod.rs
@@ -0,0 +1,40 @@
+mod intern_symbols;
+mod extract_tokens;
+mod expand_repeats;
+mod flatten_grammar;
+mod normalize_rules;
+mod extract_simple_aliases;
+
+use crate::rules::{AliasMap, Rule, Symbol};
+use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, InputVariable, ExternalToken};
+use crate::error::Result;
+use self::intern_symbols::intern_symbols;
+use self::extract_tokens::extract_tokens;
+use self::expand_repeats::expand_repeats;
+use self::flatten_grammar::flatten_grammar;
+use self::normalize_rules::normalize_rules;
+use self::extract_simple_aliases::extract_simple_aliases;
+
+pub(self) struct IntermediateGrammar<T, U> {
+    variables: Vec<InputVariable>,
+    extra_tokens: Vec<T>,
+    expected_conflicts: Vec<Vec<Symbol>>,
+    external_tokens: Vec<U>,
+    variables_to_inline: Vec<Symbol>,
+    word_token: Option<Symbol>,
+}
+
+pub(self) type InternedGrammar = IntermediateGrammar<Rule, InputVariable>;
+pub(self) type ExtractedGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+pub fn prepare_grammar(
+    input_grammar: &InputGrammar
+) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> {
+    let interned_grammar = intern_symbols(input_grammar)?;
+    let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
+    let syntax_grammar = expand_repeats(syntax_grammar);
+    let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
+    let mut lexical_grammar = normalize_rules(lexical_grammar);
+    let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar);
+    Ok((syntax_grammar, lexical_grammar, simple_aliases))
+}
diff --git a/src/prepare_grammar/normalize_rules.rs b/src/prepare_grammar/normalize_rules.rs
new file mode 100644
index 00000000..9e625ef5
--- /dev/null
+++ b/src/prepare_grammar/normalize_rules.rs
@@ -0,0 +1,5 @@
+use crate::grammars::LexicalGrammar;
+
+pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar {
+    unimplemented!();
+}
diff --git a/src/render/mod.rs b/src/render/mod.rs
new file mode 100644
index 00000000..85ce1f32
--- /dev/null
+++ b/src/render/mod.rs
@@ -0,0 +1,16 @@
+use crate::rules::{Symbol, AliasMap};
+use crate::grammars::{SyntaxGrammar, LexicalGrammar};
+use crate::tables::{ParseTable, LexTable};
+
+pub fn render_c_code(
+    name: &str,
+    parse_table: ParseTable,
+    main_lex_table: LexTable,
+    keyword_lex_table: LexTable,
+    keyword_capture_token: Option<Symbol>,
+    syntax_grammar: SyntaxGrammar,
+    lexical_grammar: LexicalGrammar,
+    simple_aliases: AliasMap,
+) -> String {
+    unimplemented!();
+}
diff --git a/src/rules.rs b/src/rules.rs
new file mode 100644
index 00000000..3cccca0d
--- /dev/null
+++ b/src/rules.rs
@@ -0,0 +1,205 @@
+use std::rc::Rc;
+use std::collections::HashMap;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum SymbolType {
+    External,
+    Terminal,
+    NonTerminal,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Associativity {
+    Left,
+    Right
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub struct Alias {
+    value: String,
+    is_named: bool,
+}
+
+pub type AliasMap = HashMap<Symbol, Alias>;
+
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
+pub struct MetadataParams {
+    precedence: Option<i32>,
+    dynamic_precedence: i32,
+    associativity: Option<Associativity>,
+    is_token: bool,
+    is_string: bool,
+    is_active: bool,
+    is_main_token: bool,
+    is_excluded: bool,
+    alias: Option<Alias>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct Symbol {
+    kind: SymbolType,
+    index: usize,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub enum Rule {
+    Blank,
+    CharacterSet(Vec<char>),
+    String(String),
+    Pattern(String),
+    NamedSymbol(String),
+    Symbol(Symbol),
+    Choice {
+        elements: Vec<Rule>,
+    },
+    Metadata {
+        params: MetadataParams,
+        rule: Rc<Rule>,
+    },
+    Repeat(Rc<Rule>),
+    Seq {
+        left: Rc<Rule>,
+        right: Rc<Rule>,
+    }
+}
+
+impl Rule {
+    pub fn token(content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.is_token = true;
+        })
+    }
+
+    pub fn immediate_token(content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.is_token = true;
+            params.is_main_token = true;
+        })
+    }
+
+    pub fn prec(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.precedence = Some(value);
+        })
+    }
+
+    pub fn prec_left(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.associativity = Some(Associativity::Left);
+            params.precedence = Some(value);
+        })
+    }
+
+    pub fn prec_right(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.associativity = Some(Associativity::Right);
+            params.precedence = Some(value);
+        })
+    }
+
+    pub fn repeat(rule: Rule) -> Self {
+        Rule::Repeat(Rc::new(rule))
+    }
+
+    pub fn choice(rules: Vec<Rule>) -> Self {
+        let mut elements = Vec::with_capacity(rules.len());
+        for rule in rules {
+            choice_helper(&mut elements, rule);
+        }
+        Rule::Choice { elements }
+    }
+
+    pub fn seq(rules: Vec<Rule>) -> Self {
+        let mut result = Rule::Blank;
+        for rule in rules {
+            match rule {
+                Rule::Blank => continue,
+                Rule::Metadata { rule, params: _ } => {
+                    if *rule == Rule::Blank {
+                        continue;
+                    }
+                },
+                _ => {
+                    if result == Rule::Blank {
+                        result = rule;
+                    } else {
+                        result = Rule::Seq {
+                            left: Rc::new(result),
+                            right: Rc::new(rule),
+                        }
+                    }
+                }
+            }
+        }
+        result
+    }
+
+    pub fn terminal(index: usize) -> Self {
+        Rule::Symbol(Symbol::terminal(index))
+    }
+
+    pub fn non_terminal(index: usize) -> Self {
+        Rule::Symbol(Symbol::non_terminal(index))
+    }
+
+    pub fn external(index: usize) -> Self {
+        Rule::Symbol(Symbol::external(index))
+    }
+
+    pub fn named(name: &'static str) -> Self {
+        Rule::NamedSymbol(name.to_string())
+    }
+
+    pub fn string(value: &'static str) -> Self {
+        Rule::String(value.to_string())
+    }
+}
+
+impl Symbol {
+    pub fn non_terminal(index: usize) -> Self {
+        Symbol { kind: SymbolType::NonTerminal, index }
+    }
+
+    pub fn terminal(index: usize) -> Self {
+        Symbol { kind: SymbolType::Terminal, index }
+    }
+
+    pub fn external(index: usize) -> Self {
+        Symbol { kind: SymbolType::External, index }
+    }
+}
+
+impl From<Symbol> for Rule {
+    fn from(symbol: Symbol) -> Self {
+        Rule::Symbol(symbol)
+    }
+}
+
+fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
+    match input {
+        Rule::Metadata { rule, mut params } => {
+            f(&mut params);
+            Rule::Metadata { rule, params }
+        },
+        _ => {
+            let mut params = MetadataParams::default();
+            f(&mut params);
+            Rule::Metadata { rule: Rc::new(input), params }
+        }
+    }
+}
+
+fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
+    match rule {
+        Rule::Choice {elements} => {
+            for element in elements {
+                choice_helper(result, element);
+            }
+        },
+        _ => {
+            if !result.contains(&rule) {
+                result.push(rule);
+            }
+        }
+    }
+}
diff --git a/src/tables.rs b/src/tables.rs
new file mode 100644
index 00000000..10b1e41d
--- /dev/null
+++ b/src/tables.rs
@@ -0,0 +1,77 @@
+use std::collections::HashMap;
+use std::ops::Range;
+use crate::rules::{Associativity, Symbol, Alias};
+
+pub type AliasSequenceId = usize;
+pub type ParseStateId = usize;
+pub type LexStateId = usize;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ParseActionType {
+    Error,
+    Shift,
+    Reduce,
+    Accept,
+    Recover,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ParseAction {
+    Accept,
+    Error,
+    Shift(ParseStateId),
+    ShiftExtra,
+    Recover,
+    Reduce {
+        symbol: Symbol,
+        child_count: usize,
+        precedence: i32,
+        dynamic_precedence: i32,
+        associativity: Option<Associativity>,
+        alias_sequence_id: Option<AliasSequenceId>,
+        is_repetition: bool,
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct ParseTableEntry {
+    actions: Vec<ParseAction>,
+    reusable: bool,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct ParseState {
+    terminal_entries: HashMap<Symbol, ParseTableEntry>,
+    nonterminal_entries: HashMap<Symbol, ParseStateId>
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct ParseTable {
+    states: Vec<ParseState>,
+    alias_sequences: Vec<Vec<Alias>>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct AdvanceAction {
+    state: LexStateId,
+    precedence: Range<i32>,
+    in_main_token: bool,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct AcceptTokenAction {
+    symbol: Symbol,
+    precedence: i32,
+    implicit_precedence: i32,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct LexState {
+    advance_actions: HashMap<Symbol, AdvanceAction>,
+    accept_action: Option<AcceptTokenAction>,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct LexTable {
+    states: Vec<LexState>,
+}

From 0688a5edd387e01ca7c83f9bbf2fb732852d2f5d Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 6 Dec 2018 22:11:52 -0800
Subject: [PATCH 050/208] Implement extract_tokens

---
 src/build_tables/mod.rs               |   2 +-
 src/grammars.rs                       |  83 +++--
 src/parse_grammar.rs                  |  12 +-
 src/prepare_grammar/expand_repeats.rs |  36 +-
 src/prepare_grammar/extract_tokens.rs | 492 +++++++++++++++++++++++++-
 src/prepare_grammar/intern_symbols.rs |  38 +-
 src/prepare_grammar/mod.rs            |   8 +-
 src/render/mod.rs                     |   2 +-
 src/rules.rs                          |  52 +--
 src/tables.rs                         |  24 +-
 10 files changed, 621 insertions(+), 128 deletions(-)

diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index c5dd5b54..c3518428 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -25,7 +25,7 @@ struct ParseTableBuilder<'a> {
     parse_table: ParseTable,
 }
 
-pub fn build_tables(
+pub(crate) fn build_tables(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
     simple_aliases: &AliasMap
diff --git a/src/grammars.rs b/src/grammars.rs
index 6f5b772e..62910637 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -1,7 +1,7 @@
 use crate::rules::{Associativity, Alias, Rule, Symbol};
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum VariableType {
+pub(crate) enum VariableType {
     Hidden,
     Auxiliary,
     Anonymous,
@@ -11,16 +11,16 @@ pub enum VariableType {
 // Input grammar
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub struct InputVariable {
+pub(crate) struct Variable {
     pub name: String,
     pub kind: VariableType,
     pub rule: Rule,
 }
 
-#[derive(PartialEq, Eq)]
-pub struct InputGrammar {
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct InputGrammar {
     pub name: String,
-    pub variables: Vec<InputVariable>,
+    pub variables: Vec<Variable>,
     pub extra_tokens: Vec<Rule>,
     pub expected_conflicts: Vec<Vec<String>>,
     pub external_tokens: Vec<Rule>,
@@ -30,60 +30,53 @@ pub struct InputGrammar {
 
 // Extracted lexical grammar
 
-#[derive(PartialEq, Eq)]
-pub struct LexicalVariable {
-    name: String,
-    kind: VariableType,
-    rule: Rule,
-    is_string: bool,
-}
-
-pub struct LexicalGrammar {
-    variables: Vec<LexicalVariable>,
-    separators: Vec<Rule>,
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct LexicalGrammar {
+    pub variables: Vec<Variable>,
+    pub separators: Vec<Rule>,
 }
 
 // Extracted syntax grammar
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ProductionStep {
-  symbol: Symbol,
-  precedence: i32,
-  associativity: Option<Associativity>,
-  alias: Option<Alias>,
-  is_excluded: bool,
+pub(crate) struct ProductionStep {
+  pub symbol: Symbol,
+  pub precedence: i32,
+  pub associativity: Option<Associativity>,
+  pub alias: Option<Alias>,
+  pub is_excluded: bool,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub struct Production {
-    steps: Vec<ProductionStep>,
-    dynamic_precedence: i32,
+pub(crate) struct Production {
+    pub steps: Vec<ProductionStep>,
+    pub dynamic_precedence: i32,
 }
 
-#[derive(Clone, PartialEq, Eq)]
-pub struct SyntaxVariable {
-    name: String,
-    kind: VariableType,
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct SyntaxVariable {
+    pub name: String,
+    pub kind: VariableType,
 }
 
-#[derive(Clone, PartialEq, Eq)]
-pub struct ExternalToken {
-    name: String,
-    kind: VariableType,
-    corresponding_internal_token: Symbol,
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ExternalToken {
+    pub name: String,
+    pub kind: VariableType,
+    pub corresponding_internal_token: Option<Symbol>,
 }
 
-pub struct SyntaxGrammar {
-    variables: Vec<SyntaxVariable>,
-    extra_tokens: Vec<Symbol>,
-    expected_conflicts: Vec<Vec<Symbol>>,
-    external_tokens: Vec<ExternalToken>,
-    variables_to_inline: Vec<Symbol>,
-    word_token: Symbol,
+#[derive(Debug)]
+pub(crate) struct SyntaxGrammar {
+    pub variables: Vec<SyntaxVariable>,
+    pub extra_tokens: Vec<Symbol>,
+    pub expected_conflicts: Vec<Vec<Symbol>>,
+    pub external_tokens: Vec<ExternalToken>,
+    pub variables_to_inline: Vec<Symbol>,
+    pub word_token: Symbol,
 }
 
-#[cfg(test)]
-impl InputVariable {
+impl Variable {
     pub fn named(name: &str, rule: Rule) -> Self {
         Self { name: name.to_string(), kind: VariableType::Named, rule }
     }
@@ -95,4 +88,8 @@ impl InputVariable {
     pub fn hidden(name: &str, rule: Rule) -> Self {
         Self { name: name.to_string(), kind: VariableType::Hidden, rule }
     }
+
+    pub fn anonymous(name: &str, rule: Rule) -> Self {
+        Self { name: name.to_string(), kind: VariableType::Anonymous, rule }
+    }
 }
diff --git a/src/parse_grammar.rs b/src/parse_grammar.rs
index 4c21e5ba..0f1f5008 100644
--- a/src/parse_grammar.rs
+++ b/src/parse_grammar.rs
@@ -1,13 +1,13 @@
 use serde_json::{Map, Value};
 use crate::error::Result;
-use crate::grammars::{InputGrammar, InputVariable, VariableType};
+use crate::grammars::{InputGrammar, Variable, VariableType};
 use crate::rules::Rule;
 use std::collections::HashMap;
 
 #[derive(Deserialize)]
 #[serde(tag = "type")]
 #[allow(non_camel_case_types)]
-pub enum RuleJSON {
+enum RuleJSON {
     BLANK,
     STRING {
         value: String,
@@ -58,12 +58,12 @@ struct GrammarJSON {
     word: Option<String>,
 }
 
-pub fn parse_grammar(input: &str) -> Result<InputGrammar> {
+pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
     let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
 
     let mut variables = Vec::with_capacity(grammar_json.rules.len());
     for (name, value) in grammar_json.rules {
-        variables.push(InputVariable {
+        variables.push(Variable {
             name: name.to_owned(),
             kind: VariableType::Named,
             rule: parse_rule(serde_json::from_value(value)?),
@@ -138,12 +138,12 @@ mod tests {
 
         assert_eq!(grammar.name, "my_lang");
         assert_eq!(grammar.variables, vec![
-            InputVariable {
+            Variable {
                 name: "file".to_string(),
                 kind: VariableType::Named,
                 rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
             },
-            InputVariable {
+            Variable {
                 name: "statement".to_string(),
                 kind: VariableType::Named,
                 rule: Rule::String("foo".to_string())
diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs
index 69db150c..dcb8f916 100644
--- a/src/prepare_grammar/expand_repeats.rs
+++ b/src/prepare_grammar/expand_repeats.rs
@@ -1,5 +1,5 @@
 use crate::rules::{Rule, Symbol};
-use crate::grammars::{InputVariable, VariableType};
+use crate::grammars::{Variable, VariableType};
 use std::collections::HashMap;
 use std::mem;
 use std::rc::Rc;
@@ -9,12 +9,12 @@ struct Expander {
     variable_name: String,
     repeat_count_in_variable: usize,
     preceding_symbol_count: usize,
-    auxiliary_variables: Vec<InputVariable>,
+    auxiliary_variables: Vec<Variable>,
     existing_repeats: HashMap<Rule, Symbol>
 }
 
 impl Expander {
-    fn expand_variable(&mut self, variable: &mut InputVariable) {
+    fn expand_variable(&mut self, variable: &mut Variable) {
         self.variable_name.clear();
         self.variable_name.push_str(&variable.name);
         self.repeat_count_in_variable = 0;
@@ -48,7 +48,7 @@ impl Expander {
                 let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
                 let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol));
                 self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
-                self.auxiliary_variables.push(InputVariable {
+                self.auxiliary_variables.push(Variable {
                     name: rule_name,
                     kind: VariableType::Auxiliary,
                     rule: Rule::Choice {
@@ -100,7 +100,7 @@ mod tests {
     fn test_basic_repeat_expansion() {
         // Repeats nested inside of sequences and choices are expanded.
         let grammar = expand_repeats(build_grammar(vec![
-            InputVariable::named("rule0", Rule::seq(vec![
+            Variable::named("rule0", Rule::seq(vec![
                 Rule::terminal(10),
                 Rule::choice(vec![
                     Rule::repeat(Rule::terminal(11)),
@@ -111,7 +111,7 @@ mod tests {
         ]));
 
         assert_eq!(grammar.variables, vec![
-            InputVariable::named("rule0", Rule::seq(vec![
+            Variable::named("rule0", Rule::seq(vec![
                 Rule::terminal(10),
                 Rule::choice(vec![
                     Rule::non_terminal(1),
@@ -119,14 +119,14 @@ mod tests {
                 ]),
                 Rule::terminal(13),
             ])),
-            InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
+            Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
                 Rule::seq(vec![
                     Rule::non_terminal(1),
                     Rule::non_terminal(1),
                 ]),
                 Rule::terminal(11),
             ])),
-            InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![
+            Variable::auxiliary("rule0_repeat2", Rule::choice(vec![
                 Rule::seq(vec![
                     Rule::non_terminal(2),
                     Rule::non_terminal(2),
@@ -140,11 +140,11 @@ mod tests {
     fn test_repeat_deduplication() {
         // Terminal 4 appears inside of a repeat in three different places.
         let grammar = expand_repeats(build_grammar(vec![
-            InputVariable::named("rule0", Rule::choice(vec![
+            Variable::named("rule0", Rule::choice(vec![
                 Rule::seq(vec![ Rule::terminal(1), Rule::repeat(Rule::terminal(4)) ]),
                 Rule::seq(vec![ Rule::terminal(2), Rule::repeat(Rule::terminal(4)) ]),
             ])),
-            InputVariable::named("rule1", Rule::seq(vec![
+            Variable::named("rule1", Rule::seq(vec![
                 Rule::terminal(3),
                 Rule::repeat(Rule::terminal(4)),
             ])),
@@ -152,15 +152,15 @@ mod tests {
 
         // Only one auxiliary rule is created for repeating terminal 4.
         assert_eq!(grammar.variables, vec![
-            InputVariable::named("rule0", Rule::choice(vec![
+            Variable::named("rule0", Rule::choice(vec![
                 Rule::seq(vec![ Rule::terminal(1), Rule::non_terminal(2) ]),
                 Rule::seq(vec![ Rule::terminal(2), Rule::non_terminal(2) ]),
             ])),
-            InputVariable::named("rule1", Rule::seq(vec![
+            Variable::named("rule1", Rule::seq(vec![
                 Rule::terminal(3),
                 Rule::non_terminal(2),
             ])),
-            InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
+            Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
                 Rule::seq(vec![
                     Rule::non_terminal(2),
                     Rule::non_terminal(2),
@@ -173,7 +173,7 @@ mod tests {
     #[test]
     fn test_expansion_of_nested_repeats() {
         let grammar = expand_repeats(build_grammar(vec![
-            InputVariable::named("rule0", Rule::seq(vec![
+            Variable::named("rule0", Rule::seq(vec![
                 Rule::terminal(10),
                 Rule::repeat(Rule::seq(vec![
                     Rule::terminal(11),
@@ -183,18 +183,18 @@ mod tests {
         ]));
 
         assert_eq!(grammar.variables, vec![
-            InputVariable::named("rule0", Rule::seq(vec![
+            Variable::named("rule0", Rule::seq(vec![
                 Rule::terminal(10),
                 Rule::non_terminal(2),
             ])),
-            InputVariable::auxiliary("rule0_repeat1", Rule::choice(vec![
+            Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
                 Rule::seq(vec![
                     Rule::non_terminal(1),
                     Rule::non_terminal(1),
                 ]),
                 Rule::terminal(12),
             ])),
-            InputVariable::auxiliary("rule0_repeat2", Rule::choice(vec![
+            Variable::auxiliary("rule0_repeat2", Rule::choice(vec![
                 Rule::seq(vec![
                     Rule::non_terminal(2),
                     Rule::non_terminal(2),
@@ -207,7 +207,7 @@ mod tests {
         ]);
     }
 
-    fn build_grammar(variables: Vec<InputVariable>) -> ExtractedGrammar {
+    fn build_grammar(variables: Vec<Variable>) -> ExtractedGrammar {
         ExtractedGrammar {
             variables,
             extra_tokens: Vec::new(),
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index 660d3819..ee90b3c8 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -1,7 +1,491 @@
-use crate::error::Result;
-use crate::grammars::LexicalGrammar;
+use std::collections::HashMap;
+use std::rc::Rc;
+use std::mem;
+use crate::error::{Error, Result};
+use crate::rules::{Rule, MetadataParams, Symbol, SymbolType};
+use crate::grammars::{Variable, VariableType, LexicalGrammar, ExternalToken};
 use super::{InternedGrammar, ExtractedGrammar};
 
-pub(super) fn extract_tokens(grammar: InternedGrammar) -> Result<(ExtractedGrammar, LexicalGrammar)> {
-    unimplemented!();
+pub(super) fn extract_tokens(
+    mut grammar: InternedGrammar
+) -> Result<(ExtractedGrammar, LexicalGrammar)> {
+    let mut extractor = TokenExtractor {
+        current_variable_name: String::new(),
+        current_variable_token_count: 0,
+        extracted_variables: Vec::new(),
+        extracted_usage_counts: Vec::new(),
+    };
+
+    for mut variable in grammar.variables.iter_mut() {
+        extractor.extract_tokens_in_variable(&mut variable);
+    }
+
+    for mut variable in grammar.external_tokens.iter_mut() {
+        extractor.extract_tokens_in_variable(&mut variable);
+    }
+
+    let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
+    for variable in extractor.extracted_variables {
+        lexical_variables.push(Variable {
+            name: variable.name,
+            kind: variable.kind,
+            rule: variable.rule,
+        });
+    }
+
+    // If a variable's entire rule was extracted as a token and that token didn't
+    // appear within any other rule, then remove that variable from the syntax
+    // grammar, giving its name to the token in the lexical grammar. Any symbols
+    // that pointed to that variable will need to be updated to point to the
+    // variable in the lexical grammar. Symbols that pointed to later variables
+    // will need to have their indices decremented.
+    let mut variables = Vec::new();
+    let mut symbol_replacer = SymbolReplacer { replacements: HashMap::new() };
+    for (i, variable) in grammar.variables.into_iter().enumerate() {
+        if let Rule::Symbol(Symbol { kind: SymbolType::Terminal, index }) = variable.rule {
+            if i > 0 && extractor.extracted_usage_counts[index] == 1 {
+                let mut lexical_variable = &mut lexical_variables[index];
+                lexical_variable.kind = variable.kind;
+                lexical_variable.name = variable.name;
+                symbol_replacer.replacements.insert(i, index);
+                continue;
+            }
+        }
+        variables.push(variable);
+    }
+
+    for variable in variables.iter_mut() {
+        variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
+    }
+
+    let expected_conflicts = grammar.expected_conflicts
+        .into_iter()
+        .map(|conflict|
+            conflict
+                .iter()
+                .map(|symbol| symbol_replacer.replace_symbol(*symbol))
+                .collect()
+        ).collect();
+
+    let variables_to_inline = grammar.variables_to_inline
+        .into_iter()
+        .map(|symbol| symbol_replacer.replace_symbol(symbol))
+        .collect();
+
+    let mut separators = Vec::new();
+    let mut extra_tokens = Vec::new();
+    for rule in grammar.extra_tokens {
+        if let Rule::Symbol(symbol) = rule {
+            let new_symbol = symbol_replacer.replace_symbol(symbol);
+            if new_symbol.is_non_terminal() {
+                return Err(Error::GrammarError(format!(
+                    "Non-token symbol '{}' cannot be used as an extra token",
+                    &variables[new_symbol.index].name
+                )));
+            } else {
+                extra_tokens.push(new_symbol);
+            }
+        } else {
+            if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
+                extra_tokens.push(Symbol::terminal(index));
+            } else {
+                separators.push(rule);
+            }
+        }
+    }
+
+    let mut external_tokens = Vec::new();
+    for external_token in grammar.external_tokens {
+        let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
+        if let Rule::Symbol(symbol) = rule {
+            if symbol.is_non_terminal() {
+                return Err(Error::GrammarError(format!(
+                    "Rule '{}' cannot be used as both an external token and a non-terminal rule",
+                    &variables[symbol.index].name,
+                )));
+            }
+
+            if symbol.is_external() {
+                external_tokens.push(ExternalToken {
+                    name: external_token.name,
+                    kind: external_token.kind,
+                    corresponding_internal_token: None,
+                })
+            } else {
+                external_tokens.push(ExternalToken {
+                    name: lexical_variables[symbol.index].name.clone(),
+                    kind: external_token.kind,
+                    corresponding_internal_token: Some(symbol),
+                })
+            }
+        } else {
+            return Err(Error::GrammarError(format!(
+                "Non-symbol rules cannot be used as external tokens"
+            )));
+        }
+    }
+
+    let mut word_token = None;
+    if let Some(token) = grammar.word_token {
+        let token = symbol_replacer.replace_symbol(token);
+        if token.is_non_terminal() {
+            return Err(Error::GrammarError(format!(
+                "Non-terminal symbol '{}' cannot be used as the word token",
+                &variables[token.index].name
+            )));
+        }
+        word_token = Some(token);
+    }
+
+    Ok((
+        ExtractedGrammar {
+            variables,
+            expected_conflicts,
+            extra_tokens,
+            variables_to_inline,
+            external_tokens,
+            word_token,
+        },
+        LexicalGrammar {
+            variables: lexical_variables,
+            separators,
+        }
+    ))
+}
+
+struct TokenExtractor {
+    current_variable_name: String,
+    current_variable_token_count: usize,
+    extracted_variables: Vec<Variable>,
+    extracted_usage_counts: Vec<usize>,
+}
+
+struct SymbolReplacer {
+    replacements: HashMap<usize, usize>
+}
+
+impl TokenExtractor {
+    fn extract_tokens_in_variable(&mut self, variable: &mut Variable) {
+        self.current_variable_name.clear();
+        self.current_variable_name.push_str(&variable.name);
+        self.current_variable_token_count = 0;
+        let mut rule = Rule::Blank;
+        mem::swap(&mut rule, &mut variable.rule);
+        variable.rule = self.extract_tokens_in_rule(&rule);
+    }
+
+    fn extract_tokens_in_rule(&mut self, input: &Rule) -> Rule {
+        match input {
+            Rule::String(name) => self.extract_token(input, Some(name)).into(),
+            Rule::Pattern(..) => self.extract_token(input, None).into(),
+            Rule::Metadata { params, rule } => {
+                if params.is_token {
+                    let mut params = params.clone();
+                    params.is_token = false;
+
+                    let mut string_value = None;
+                    if let Rule::String(value) = rule.as_ref() {
+                        string_value = Some(value);
+                    }
+
+                    let rule_to_extract = if params == MetadataParams::default() {
+                        rule.as_ref()
+                    } else {
+                        input
+                    };
+
+                    self.extract_token(rule_to_extract, string_value).into()
+                } else {
+                    Rule::Metadata {
+                        params: params.clone(),
+                        rule: Rc::new(self.extract_tokens_in_rule((&rule).clone()))
+                    }
+                }
+            },
+            Rule::Repeat(content) => Rule::Repeat(
+                Rc::new(self.extract_tokens_in_rule(content))
+            ),
+            Rule::Seq { left, right } => Rule::Seq {
+                left: Rc::new(self.extract_tokens_in_rule(left)),
+                right: Rc::new(self.extract_tokens_in_rule(right)),
+            },
+            Rule::Choice { elements } => Rule::Choice {
+                elements: elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
+            },
+            _ => input.clone()
+        }
+    }
+
+    fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Symbol {
+        for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
+            if variable.rule == *rule {
+                self.extracted_usage_counts[i] += 1;
+                return Symbol::terminal(i)
+            }
+        }
+
+        let index = self.extracted_variables.len();
+        let variable = if let Some(string_value) = string_value {
+            Variable::anonymous(string_value, rule.clone())
+        } else {
+            self.current_variable_token_count += 1;
+            Variable::auxiliary(
+                &format!(
+                    "{}_token{}",
+                    &self.current_variable_name,
+                    self.current_variable_token_count
+                ),
+                rule.clone()
+            )
+        };
+
+        self.extracted_variables.push(variable);
+        self.extracted_usage_counts.push(1);
+        Symbol::terminal(index)
+    }
+}
+
+impl SymbolReplacer {
+    fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
+        match rule {
+            Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
+            Rule::Choice { elements } => Rule::Choice {
+                elements: elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
+            },
+            Rule::Seq { left, right } => Rule::Seq {
+                left: Rc::new(self.replace_symbols_in_rule(left)),
+                right: Rc::new(self.replace_symbols_in_rule(right)),
+            },
+            Rule::Repeat(content) => Rule::Repeat(
+                Rc::new(self.replace_symbols_in_rule(content))
+            ),
+            Rule::Metadata { rule, params } => Rule::Metadata {
+                params: params.clone(),
+                rule: Rc::new(self.replace_symbols_in_rule(rule)),
+            },
+            _ => rule.clone()
+        }
+    }
+
+    fn replace_symbol(&self, symbol: Symbol) -> Symbol {
+        if !symbol.is_non_terminal() {
+            return symbol
+        }
+
+        if let Some(replacement) = self.replacements.get(&symbol.index) {
+            return Symbol::terminal(*replacement);
+        }
+
+        let mut adjusted_index = symbol.index;
+        for (replaced_index, _) in self.replacements.iter() {
+            if *replaced_index < symbol.index {
+                adjusted_index -= 1;
+            }
+        }
+
+        return Symbol::non_terminal(adjusted_index);
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_extraction() {
+        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
+            Variable::named("rule_0", Rule::repeat(Rule::seq(vec![
+                Rule::string("a"),
+                Rule::pattern("b"),
+                Rule::choice(vec![
+                    Rule::non_terminal(1),
+                    Rule::non_terminal(2),
+                    Rule::token(Rule::repeat(Rule::choice(vec![
+                        Rule::string("c"),
+                        Rule::string("d"),
+                    ])))
+                ])
+            ]))),
+            Variable::named("rule_1", Rule::pattern("e")),
+            Variable::named("rule_2", Rule::pattern("b")),
+            Variable::named("rule_3", Rule::seq(vec![
+                Rule::non_terminal(2),
+                Rule::Blank,
+            ])),
+        ])).unwrap();
+
+        assert_eq!(syntax_grammar.variables, vec![
+            Variable::named("rule_0", Rule::repeat(Rule::seq(vec![
+                // The string "a" was replaced by a symbol referencing the lexical grammar
+                Rule::terminal(0),
+
+                // The pattern "b" was replaced by a symbol referencing the lexical grammar
+                Rule::terminal(1),
+                Rule::choice(vec![
+                    // The symbol referencing `rule_1` was replaced by a symbol referencing
+                    // the lexical grammar.
+                    Rule::terminal(3),
+
+                    // The symbol referencing `rule_2` had its index decremented because
+                    // `rule_1` was moved to the lexical grammar.
+                    Rule::non_terminal(1),
+
+                    // The rule wrapped in `token` was replaced by a symbol referencing
+                    // the lexical grammar.
+                    Rule::terminal(2),
+                ])
+            ]))),
+
+            // The pattern "e" was only used in once place: as the definition of `rule_1`,
+            // so that rule was moved to the lexical grammar. The pattern "b" appeared in
+            // two places, so it was not moved into the lexical grammar.
+            Variable::named("rule_2", Rule::terminal(1)),
+            Variable::named("rule_3", Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::Blank,
+            ])),
+        ]);
+
+        assert_eq!(lexical_grammar.variables, vec![
+            Variable::anonymous("a", Rule::string("a")),
+            Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
+            Variable::auxiliary("rule_0_token2", Rule::repeat(Rule::choice(vec![
+                Rule::string("c"),
+                Rule::string("d"),
+            ]))),
+            Variable::named("rule_1", Rule::pattern("e")),
+        ]);
+    }
+
+    #[test]
+    fn test_start_rule_is_token() {
+        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
+            Variable::named("rule_0", Rule::string("hello")),
+        ])).unwrap();
+
+        assert_eq!(syntax_grammar.variables, vec![
+            Variable::named("rule_0", Rule::terminal(0)),
+        ]);
+        assert_eq!(lexical_grammar.variables, vec![
+            Variable::anonymous("hello", Rule::string("hello")),
+        ])
+    }
+
+    #[test]
+    fn test_extracting_extra_tokens() {
+        let mut grammar = build_grammar(vec![
+            Variable::named("rule_0", Rule::string("x")),
+            Variable::named("comment", Rule::pattern("//.*")),
+        ]);
+        grammar.extra_tokens = vec![
+            Rule::string(" "),
+            Rule::non_terminal(1),
+        ];
+
+        let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
+        assert_eq!(syntax_grammar.extra_tokens, vec![
+            Symbol::terminal(1),
+        ]);
+        assert_eq!(lexical_grammar.separators, vec![
+            Rule::string(" "),
+        ]);
+    }
+
+    #[test]
+    fn test_extract_externals() {
+        let mut grammar = build_grammar(vec![
+            Variable::named("rule_0", Rule::seq(vec![
+                Rule::external(0),
+                Rule::string("a"),
+                Rule::non_terminal(1),
+                Rule::non_terminal(2),
+            ])),
+            Variable::named("rule_1", Rule::string("b")),
+            Variable::named("rule_2", Rule::string("c")),
+        ]);
+        grammar.external_tokens = vec![
+            Variable::named("external_0", Rule::external(0)),
+            Variable::anonymous("a", Rule::string("a")),
+            Variable::named("rule_2", Rule::non_terminal(2)),
+        ];
+
+        let (syntax_grammar, _) = extract_tokens(grammar).unwrap();
+
+        assert_eq!(syntax_grammar.external_tokens, vec![
+            ExternalToken {
+                name: "external_0".to_string(),
+                kind: VariableType::Named,
+                corresponding_internal_token: None,
+            },
+            ExternalToken {
+                name: "a".to_string(),
+                kind: VariableType::Anonymous,
+                corresponding_internal_token: Some(Symbol::terminal(0)),
+            },
+            ExternalToken {
+                name: "rule_2".to_string(),
+                kind: VariableType::Named,
+                corresponding_internal_token: Some(Symbol::terminal(2)),
+            },
+        ]);
+    }
+
+    #[test]
+    fn test_error_on_non_terminal_symbol_extras() {
+        let mut grammar = build_grammar(vec![
+            Variable::named("rule_0", Rule::non_terminal(1)),
+            Variable::named("rule_1", Rule::non_terminal(2)),
+            Variable::named("rule_2", Rule::string("x")),
+        ]);
+        grammar.extra_tokens = vec![
+            Rule::non_terminal(1),
+        ];
+
+        match extract_tokens(grammar) {
+            Err(Error::GrammarError(s)) => {
+                assert_eq!(s, "Non-token symbol 'rule_1' cannot be used as an extra token");
+            },
+            _ => {
+                panic!("Expected an error but got no error");
+            }
+        }
+    }
+
+    #[test]
+    fn test_error_on_external_with_same_name_as_non_terminal() {
+        let mut grammar = build_grammar(vec![
+            Variable::named("rule_0", Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::non_terminal(2),
+            ])),
+            Variable::named("rule_1", Rule::seq(vec![
+                Rule::non_terminal(2),
+                Rule::non_terminal(2),
+            ])),
+            Variable::named("rule_2", Rule::string("a")),
+        ]);
+        grammar.external_tokens = vec![
+            Variable::named("rule_1", Rule::non_terminal(1)),
+        ];
+
+        match extract_tokens(grammar) {
+            Err(Error::GrammarError(s)) => {
+                assert_eq!(s, "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
+            },
+            _ => {
+                panic!("Expected an error but got no error");
+            }
+        }
+    }
+
+    fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
+        InternedGrammar {
+            variables,
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+        }
+    }
 }
diff --git a/src/prepare_grammar/intern_symbols.rs b/src/prepare_grammar/intern_symbols.rs
index 00a5c330..e4cf7ff1 100644
--- a/src/prepare_grammar/intern_symbols.rs
+++ b/src/prepare_grammar/intern_symbols.rs
@@ -1,6 +1,6 @@
 use crate::error::{Error, Result};
 use crate::rules::{Rule, Symbol};
-use crate::grammars::{InputGrammar, InputVariable, VariableType};
+use crate::grammars::{InputGrammar, Variable, VariableType};
 use std::rc::Rc;
 use super::InternedGrammar;
 
@@ -13,7 +13,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
 
     let mut variables = Vec::with_capacity(grammar.variables.len());
     for variable in grammar.variables.iter() {
-        variables.push(InputVariable {
+        variables.push(Variable {
             name: variable.name.clone(),
             kind: variable_type_for_name(&variable.name),
             rule: interner.intern_rule(&variable.rule)?,
@@ -28,7 +28,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
         } else {
             (String::new(), VariableType::Anonymous)
         };
-        external_tokens.push(InputVariable { name, kind, rule });
+        external_tokens.push(Variable { name, kind, rule });
     }
 
     let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
@@ -154,21 +154,21 @@ mod tests {
     #[test]
     fn test_basic_repeat_expansion() {
         let grammar = intern_symbols(&build_grammar(vec![
-            InputVariable::named("x", Rule::choice(vec![
+            Variable::named("x", Rule::choice(vec![
                 Rule::named("y"),
                 Rule::named("_z"),
             ])),
-            InputVariable::named("y", Rule::named("_z")),
-            InputVariable::named("_z", Rule::string("a")),
+            Variable::named("y", Rule::named("_z")),
+            Variable::named("_z", Rule::string("a")),
         ])).unwrap();
 
         assert_eq!(grammar.variables, vec![
-            InputVariable::named("x", Rule::choice(vec![
+            Variable::named("x", Rule::choice(vec![
                 Rule::non_terminal(1),
                 Rule::non_terminal(2),
             ])),
-            InputVariable::named("y", Rule::non_terminal(2)),
-            InputVariable::hidden("_z", Rule::string("a")),
+            Variable::named("y", Rule::non_terminal(2)),
+            Variable::hidden("_z", Rule::string("a")),
         ]);
     }
 
@@ -177,13 +177,13 @@ mod tests {
         // Variable `y` is both an internal and an external token.
         // Variable `z` is just an external token.
         let mut input_grammar = build_grammar(vec![
-            InputVariable::named("w", Rule::choice(vec![
+            Variable::named("w", Rule::choice(vec![
                 Rule::named("x"),
                 Rule::named("y"),
                 Rule::named("z"),
             ])),
-            InputVariable::named("x", Rule::string("a")),
-            InputVariable::named("y", Rule::string("b")),
+            Variable::named("x", Rule::string("a")),
+            Variable::named("y", Rule::string("b")),
         ]);
         input_grammar.external_tokens.extend(vec![
             Rule::named("y"),
@@ -195,26 +195,26 @@ mod tests {
         // Variable `y` is referred to by its internal index.
         // Variable `z` is referred to by its external index.
         assert_eq!(grammar.variables, vec![
-            InputVariable::named("w", Rule::choice(vec![
+            Variable::named("w", Rule::choice(vec![
                 Rule::non_terminal(1),
                 Rule::non_terminal(2),
                 Rule::external(1),
             ])),
-            InputVariable::named("x", Rule::string("a")),
-            InputVariable::named("y", Rule::string("b")),
+            Variable::named("x", Rule::string("a")),
+            Variable::named("y", Rule::string("b")),
         ]);
 
         // The external token for `y` refers back to its internal index.
         assert_eq!(grammar.external_tokens, vec![
-            InputVariable::named("y", Rule::non_terminal(2)),
-            InputVariable::named("z", Rule::external(1)),
+            Variable::named("y", Rule::non_terminal(2)),
+            Variable::named("z", Rule::external(1)),
         ]);
     }
 
     #[test]
     fn test_grammar_with_undefined_symbols() {
         let result = intern_symbols(&build_grammar(vec![
-            InputVariable::named("x", Rule::named("y")),
+            Variable::named("x", Rule::named("y")),
         ]));
 
         match result {
@@ -223,7 +223,7 @@ mod tests {
         }
     }
 
-    fn build_grammar(variables: Vec<InputVariable>) -> InputGrammar {
+    fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
         InputGrammar {
             variables,
             name: "the_language".to_string(),
diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs
index 0788edca..b860807a 100644
--- a/src/prepare_grammar/mod.rs
+++ b/src/prepare_grammar/mod.rs
@@ -6,7 +6,7 @@ mod normalize_rules;
 mod extract_simple_aliases;
 
 use crate::rules::{AliasMap, Rule, Symbol};
-use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, InputVariable, ExternalToken};
+use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, Variable, ExternalToken};
 use crate::error::Result;
 use self::intern_symbols::intern_symbols;
 use self::extract_tokens::extract_tokens;
@@ -16,7 +16,7 @@ use self::normalize_rules::normalize_rules;
 use self::extract_simple_aliases::extract_simple_aliases;
 
 pub(self) struct IntermediateGrammar<T, U> {
-    variables: Vec<InputVariable>,
+    variables: Vec<Variable>,
     extra_tokens: Vec<T>,
     expected_conflicts: Vec<Vec<Symbol>>,
     external_tokens: Vec<U>,
@@ -24,10 +24,10 @@ pub(self) struct IntermediateGrammar<T, U> {
     word_token: Option<Symbol>,
 }
 
-pub(self) type InternedGrammar = IntermediateGrammar<Rule, InputVariable>;
+pub(self) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
 pub(self) type ExtractedGrammar = IntermediateGrammar<Symbol, ExternalToken>;
 
-pub fn prepare_grammar(
+pub(crate) fn prepare_grammar(
     input_grammar: &InputGrammar
 ) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> {
     let interned_grammar = intern_symbols(input_grammar)?;
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 85ce1f32..5bd11a34 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -2,7 +2,7 @@ use crate::rules::{Symbol, AliasMap};
 use crate::grammars::{SyntaxGrammar, LexicalGrammar};
 use crate::tables::{ParseTable, LexTable};
 
-pub fn render_c_code(
+pub(crate) fn render_c_code(
     name: &str,
     parse_table: ParseTable,
     main_lex_table: LexTable,
diff --git a/src/rules.rs b/src/rules.rs
index 3cccca0d..5c3b65fd 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -2,47 +2,47 @@ use std::rc::Rc;
 use std::collections::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum SymbolType {
+pub(crate) enum SymbolType {
     External,
     Terminal,
     NonTerminal,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum Associativity {
+pub(crate) enum Associativity {
     Left,
     Right
 }
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
-pub struct Alias {
-    value: String,
-    is_named: bool,
+pub(crate) struct Alias {
+    pub value: String,
+    pub is_named: bool,
 }
 
-pub type AliasMap = HashMap<Symbol, Alias>;
+pub(crate) type AliasMap = HashMap<Symbol, Alias>;
 
 #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
-pub struct MetadataParams {
-    precedence: Option<i32>,
-    dynamic_precedence: i32,
-    associativity: Option<Associativity>,
-    is_token: bool,
-    is_string: bool,
-    is_active: bool,
-    is_main_token: bool,
-    is_excluded: bool,
-    alias: Option<Alias>,
+pub(crate) struct MetadataParams {
+    pub precedence: Option<i32>,
+    pub dynamic_precedence: i32,
+    pub associativity: Option<Associativity>,
+    pub is_token: bool,
+    pub is_string: bool,
+    pub is_active: bool,
+    pub is_main_token: bool,
+    pub is_excluded: bool,
+    pub alias: Option<Alias>,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub struct Symbol {
-    kind: SymbolType,
-    index: usize,
+pub(crate) struct Symbol {
+    pub kind: SymbolType,
+    pub index: usize,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
-pub enum Rule {
+pub(crate) enum Rule {
     Blank,
     CharacterSet(Vec<char>),
     String(String),
@@ -153,9 +153,21 @@ impl Rule {
     pub fn string(value: &'static str) -> Self {
         Rule::String(value.to_string())
     }
+
+    pub fn pattern(value: &'static str) -> Self {
+        Rule::Pattern(value.to_string())
+    }
 }
 
 impl Symbol {
+    pub fn is_non_terminal(&self) -> bool {
+        return self.kind == SymbolType::NonTerminal
+    }
+
+    pub fn is_external(&self) -> bool {
+        return self.kind == SymbolType::External
+    }
+
     pub fn non_terminal(index: usize) -> Self {
         Symbol { kind: SymbolType::NonTerminal, index }
     }
diff --git a/src/tables.rs b/src/tables.rs
index 10b1e41d..de66253c 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -2,12 +2,12 @@ use std::collections::HashMap;
 use std::ops::Range;
 use crate::rules::{Associativity, Symbol, Alias};
 
-pub type AliasSequenceId = usize;
-pub type ParseStateId = usize;
-pub type LexStateId = usize;
+pub(crate) type AliasSequenceId = usize;
+pub(crate) type ParseStateId = usize;
+pub(crate) type LexStateId = usize;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum ParseActionType {
+pub(crate) enum ParseActionType {
     Error,
     Shift,
     Reduce,
@@ -16,7 +16,7 @@ pub enum ParseActionType {
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum ParseAction {
+pub(crate) enum ParseAction {
     Accept,
     Error,
     Shift(ParseStateId),
@@ -34,44 +34,44 @@ pub enum ParseAction {
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ParseTableEntry {
+pub(crate) struct ParseTableEntry {
     actions: Vec<ParseAction>,
     reusable: bool,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ParseState {
+pub(crate) struct ParseState {
     terminal_entries: HashMap<Symbol, ParseTableEntry>,
     nonterminal_entries: HashMap<Symbol, ParseStateId>
 }
 
 #[derive(Debug, PartialEq, Eq)]
-pub struct ParseTable {
+pub(crate) struct ParseTable {
     states: Vec<ParseState>,
     alias_sequences: Vec<Vec<Alias>>,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub struct AdvanceAction {
+pub(crate) struct AdvanceAction {
     state: LexStateId,
     precedence: Range<i32>,
     in_main_token: bool,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub struct AcceptTokenAction {
+pub(crate) struct AcceptTokenAction {
     symbol: Symbol,
     precedence: i32,
     implicit_precedence: i32,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub struct LexState {
+pub(crate) struct LexState {
     advance_actions: HashMap<Symbol, AdvanceAction>,
     accept_action: Option<AcceptTokenAction>,
 }
 
 #[derive(Debug, PartialEq, Eq)]
-pub struct LexTable {
+pub(crate) struct LexTable {
     states: Vec<LexState>,
 }

From ead6ca1738c52e8da4a2eb577d1c4c50b08593b4 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 8 Dec 2018 13:44:11 -0800
Subject: [PATCH 051/208] Generate NFAs from regexes

---
 Cargo.lock                             |   1 +
 Cargo.toml                             |   1 +
 src/error.rs                           |  11 ++
 src/main.rs                            |   1 +
 src/nfa.rs                             | 160 ++++++++++++++++++
 src/prepare_grammar/normalize_rules.rs | 224 +++++++++++++++++++++++++
 src/rules.rs                           |   2 +-
 7 files changed, 399 insertions(+), 1 deletion(-)
 create mode 100644 src/nfa.rs

diff --git a/Cargo.lock b/Cargo.lock
index 20908681..d5109fb7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -466,6 +466,7 @@ dependencies = [
  "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/Cargo.toml b/Cargo.toml
index 965cc81e..93a49d2c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,3 +15,4 @@ serde = "1.0"
 serde_derive = "1.0"
 serde_json = "1.0"
 tree-sitter = "0.3.1"
+regex-syntax = "0.6.4"
diff --git a/src/error.rs b/src/error.rs
index 90e7b8f9..49064c22 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -2,10 +2,21 @@
 pub enum Error {
     GrammarError(String),
     SymbolError(String),
+    RegexError(String),
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
 
+impl Error {
+    pub fn grammar(message: &str) -> Self {
+        Error::GrammarError(message.to_string())
+    }
+
+    pub fn regex(message: &str) -> Self {
+        Error::RegexError(message.to_string())
+    }
+}
+
 impl From<serde_json::Error> for Error {
     fn from(error: serde_json::Error) -> Self {
         Error::GrammarError(error.to_string())
diff --git a/src/main.rs b/src/main.rs
index 3eeb306a..4d376929 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,6 +7,7 @@ mod build_tables;
 mod error;
 mod generate;
 mod grammars;
+mod nfa;
 mod parse_grammar;
 mod prepare_grammar;
 mod render;
diff --git a/src/nfa.rs b/src/nfa.rs
new file mode 100644
index 00000000..55aa11dc
--- /dev/null
+++ b/src/nfa.rs
@@ -0,0 +1,160 @@
+use std::fmt;
+use std::char;
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub enum CharacterSet {
+    Include(Vec<char>),
+    Exclude(Vec<char>),
+}
+
+#[derive(Debug)]
+pub enum NfaState {
+    Advance(CharacterSet, u32),
+    Split(u32, u32),
+    Accept,
+}
+
+pub struct Nfa {
+    pub states: Vec<NfaState>
+}
+
+#[derive(Debug)]
+pub struct NfaCursor<'a> {
+    indices: Vec<u32>,
+    nfa: &'a Nfa,
+}
+
+impl CharacterSet {
+    pub fn empty() -> Self {
+        CharacterSet::Include(Vec::new())
+    }
+
+    pub fn all() -> Self {
+        CharacterSet::Exclude(Vec::new())
+    }
+
+    pub fn negate(self) -> CharacterSet {
+        match self {
+            CharacterSet::Include(chars) => CharacterSet::Exclude(chars),
+            CharacterSet::Exclude(chars) => CharacterSet::Include(chars),
+        }
+    }
+
+    pub fn add_char(self, c: char) -> Self {
+        if let CharacterSet::Include(mut chars) = self {
+            if let Err(i) = chars.binary_search(&c) {
+                chars.insert(i, c);
+            }
+            CharacterSet::Include(chars)
+        } else {
+            panic!("Called add with a negated character set");
+        }
+    }
+
+    pub fn add_range(self, start: char, end: char) -> Self {
+        if let CharacterSet::Include(mut chars) = self {
+            let mut c = start as u32;
+            while c <= end as u32 {
+                chars.push(char::from_u32(c).unwrap());
+                c += 1;
+            }
+            chars.sort_unstable();
+            chars.dedup();
+            CharacterSet::Include(chars)
+        } else {
+            panic!("Called add with a negated character set");
+        }
+    }
+
+    pub fn add(self, other: CharacterSet) -> Self {
+        if let (CharacterSet::Include(mut chars), CharacterSet::Include(other_chars)) = (self, other) {
+            chars.extend(other_chars);
+            chars.sort_unstable();
+            chars.dedup();
+            CharacterSet::Include(chars)
+        } else {
+            panic!("Called add with a negated character set");
+        }
+    }
+
+    pub fn contains(&self, c: char) -> bool {
+        match self {
+            CharacterSet::Include(chars) => chars.contains(&c),
+            CharacterSet::Exclude(chars) => !chars.contains(&c),
+        }
+    }
+}
+
+impl Nfa {
+    pub fn new() -> Self {
+        Nfa { states: vec![NfaState::Accept] }
+    }
+
+    pub fn start_index(&self) -> u32 {
+        self.states.len() as u32 - 1
+    }
+
+    pub fn prepend(&mut self, f: impl Fn(u32) -> NfaState) {
+        self.states.push(f(self.start_index()));
+    }
+}
+
+impl fmt::Debug for Nfa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Nfa {{ states: {{")?;
+        for (i, state) in self.states.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{}: {:?}", i, state)?;
+        }
+        write!(f, "}} }}")?;
+        Ok(())
+    }
+}
+
+impl<'a> NfaCursor<'a> {
+    pub fn new(nfa: &'a Nfa) -> Self {
+        let mut result = Self { nfa, indices: Vec::new() };
+        result.add_indices(&mut vec![nfa.start_index()]);
+        result
+    }
+
+    pub fn advance(&mut self, c: char) -> bool {
+        let mut result = false;
+        let mut new_indices = Vec::new();
+        for index in &self.indices {
+            if let NfaState::Advance(chars, next_index) = &self.nfa.states[*index as usize] {
+                if chars.contains(c) {
+                    new_indices.push(*next_index);
+                    result = true;
+                }
+            }
+        }
+        self.indices.clear();
+        self.add_indices(&mut new_indices);
+        result
+    }
+
+    pub fn is_done(&self) -> bool {
+        self.indices.iter().any(|index| {
+            if let NfaState::Accept = self.nfa.states[*index as usize] {
+                true
+            } else {
+                false
+            }
+        })
+    }
+
+    pub fn add_indices(&mut self, new_indices: &mut Vec<u32>) {
+        while let Some(index) = new_indices.pop() {
+            let state = &self.nfa.states[index as usize];
+            if let NfaState::Split(left, right) = state {
+                new_indices.push(*left);
+                new_indices.push(*right);
+            } else if let Err(i) = self.indices.binary_search(&index) {
+                self.indices.insert(i, index);
+            }
+        }
+    }
+}
diff --git a/src/prepare_grammar/normalize_rules.rs b/src/prepare_grammar/normalize_rules.rs
index 9e625ef5..67177b4f 100644
--- a/src/prepare_grammar/normalize_rules.rs
+++ b/src/prepare_grammar/normalize_rules.rs
@@ -1,5 +1,229 @@
+use crate::error::{Error, Result};
+use crate::rules::Rule;
 use crate::grammars::LexicalGrammar;
+use crate::nfa::{Nfa, NfaState, NfaCursor, CharacterSet};
+use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};
+
+fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
+    match item {
+        ClassPerlKind::Digit => CharacterSet::empty()
+            .add_range('0', '9'),
+        ClassPerlKind::Space => CharacterSet::empty()
+            .add_char(' ')
+            .add_char('\t')
+            .add_char('\r')
+            .add_char('\n'),
+        ClassPerlKind::Word => CharacterSet::empty()
+            .add_char('_')
+            .add_range('A', 'Z')
+            .add_range('a', 'z')
+            .add_range('0', '9')
+    }
+}
+
+fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
+    match item {
+        ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
+        ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
+        ClassSetItem::Range(range) => Ok(CharacterSet::empty().add_range(range.start.c, range.end.c)),
+        ClassSetItem::Union(union) => {
+            let mut result = CharacterSet::empty();
+            for item in &union.items {
+                result = result.add(evaluate_character_class(&item)?);
+            }
+            Ok(result)
+        }
+        _ => Err(Error::regex("Unsupported character class syntax")),
+    }
+}
+
+fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
+    match ast {
+        Ast::Empty(_) => Ok(()),
+        Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
+        Ast::Literal(literal) => {
+            nfa.states.push(NfaState::Advance(CharacterSet::Include(vec![literal.c]), next_state_index));
+            Ok(())
+        },
+        Ast::Dot(_) => {
+            nfa.states.push(NfaState::Advance(CharacterSet::Exclude(vec!['\n']), next_state_index));
+            Ok(())
+        },
+        Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
+        Ast::Class(class) => match class {
+            Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
+            Class::Perl(class) => {
+                nfa.states.push(NfaState::Advance(evaluate_perl_class(&class.kind), next_state_index));
+                Ok(())
+            },
+            Class::Bracketed(class) => match &class.kind {
+                ClassSet::Item(item) => {
+                    let character_set = evaluate_character_class(&item)?;
+                    nfa.states.push(NfaState::Advance(character_set, next_state_index));
+                    Ok(())
+                },
+                ClassSet::BinaryOp(_) => {
+                    Err(Error::regex("Binary operators in character classes aren't supported"))
+                }
+            }
+        },
+        Ast::Repetition(repetition) => match repetition.op.kind {
+            RepetitionKind::ZeroOrOne => {
+                regex_to_nfa(&repetition.ast, nfa, next_state_index)?;
+                nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index));
+                Ok(())
+            },
+            RepetitionKind::OneOrMore => {
+                nfa.states.push(NfaState::Accept); // Placeholder for split
+                let split_index = nfa.start_index();
+                regex_to_nfa(&repetition.ast, nfa, split_index)?;
+                nfa.states[split_index as usize] = NfaState::Split(
+                    nfa.start_index(),
+                    next_state_index
+                );
+                Ok(())
+            },
+            RepetitionKind::ZeroOrMore => {
+                nfa.states.push(NfaState::Accept); // Placeholder for split
+                let split_index = nfa.start_index();
+                regex_to_nfa(&repetition.ast, nfa, split_index)?;
+                nfa.states[split_index as usize] = NfaState::Split(
+                    nfa.start_index(),
+                    next_state_index
+                );
+                nfa.prepend(|start_index| NfaState::Split(start_index, next_state_index));
+                Ok(())
+            },
+            RepetitionKind::Range(_) => unimplemented!(),
+        },
+        Ast::Group(group) => regex_to_nfa(&group.ast, nfa, nfa.start_index()),
+        Ast::Alternation(alternation) => {
+            let mut alternative_start_indices = Vec::new();
+            for ast in alternation.asts.iter() {
+                regex_to_nfa(&ast, nfa, next_state_index)?;
+                alternative_start_indices.push(nfa.start_index());
+            }
+            alternative_start_indices.pop();
+            for alternative_start_index in alternative_start_indices {
+                nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
+            }
+            Ok(())
+        },
+        Ast::Concat(concat) => {
+            for ast in concat.asts.iter().rev() {
+                regex_to_nfa(&ast, nfa, next_state_index)?;
+                next_state_index = nfa.start_index();
+            }
+            Ok(())
+        }
+    }
+}
+
+fn expand_rule(rule: Rule) -> Result<Nfa> {
+    match rule {
+        Rule::Pattern(s) => {
+            let ast = parse::Parser::new().parse(&s).map_err(|e| Error::GrammarError(e.to_string()))?;
+            let mut nfa = Nfa::new();
+            regex_to_nfa(&ast, &mut nfa, 0)?;
+            Ok(nfa)
+        },
+        Rule::String(s) => {
+            let mut nfa = Nfa::new();
+            for c in s.chars().rev() {
+                nfa.prepend(|start_index| NfaState::Advance(CharacterSet::empty().add_char(c), start_index));
+            }
+            Ok(nfa)
+        },
+        _ => Err(Error::grammar("Unexpected rule type")),
+    }
+}
 
 pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar {
     unimplemented!();
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn simulate_nfa<'a>(nfa: &'a Nfa, s: &'a str) -> Option<&'a str> {
+        let mut result = None;
+        let mut char_count = 0;
+        let mut cursor = NfaCursor::new(nfa);
+        for c in s.chars() {
+            if cursor.is_done() {
+                result = Some(&s[0..char_count]);
+            }
+            if cursor.advance(c) {
+                char_count += 1;
+            } else {
+                break;
+            }
+        }
+        result
+    }
+
+    #[test]
+    fn test_regex_expansion() {
+        struct Row {
+            pattern: &'static str,
+            examples: Vec<(&'static str, Option<&'static str>)>,
+        }
+
+        let table = [
+            Row {
+                pattern: "a|bc",
+                examples: vec![
+                    ("a12", Some("a")),
+                    ("bc12", Some("bc")),
+                    ("b12", None),
+                    ("c12", None),
+                ],
+            },
+            Row {
+                pattern: "(a|b|c)d(e|f|g)h?",
+                examples: vec![
+                    ("ade1", Some("ade")),
+                    ("bdf1", Some("bdf")),
+                    ("bdfh1", Some("bdfh")),
+                    ("ad1", None),
+                ],
+            },
+            Row {
+                pattern: "a*",
+                examples: vec![
+                    ("aaa1", Some("aaa")),
+                    ("b", Some("")),
+                ],
+            },
+            Row {
+                pattern: "a((bc)+|(de)*)f",
+                examples: vec![
+                    ("af1", Some("af")),
+                    ("adedef1", Some("adedef")),
+                    ("abcbcbcf1", Some("abcbcbcf")),
+                    ("a", None),
+                ],
+            },
+            Row {
+                pattern: "[a-fA-F0-9]+",
+                examples: vec![
+                    ("A1ff0", Some("A1ff")),
+                ],
+            },
+            Row {
+                pattern: "\\w\\d\\s",
+                examples: vec![
+                    ("_0  ", Some("_0 ")),
+                ],
+            },
+        ];
+
+        for Row { pattern, examples } in table.iter() {
+            let nfa = expand_rule(Rule::pattern(pattern)).unwrap();
+            for (haystack, needle) in examples.iter() {
+                assert_eq!(simulate_nfa(&nfa, haystack), *needle);
+            }
+        }
+    }
+}
diff --git a/src/rules.rs b/src/rules.rs
index 5c3b65fd..b593496a 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -1,4 +1,5 @@
 use std::rc::Rc;
+use std::char;
 use std::collections::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@@ -44,7 +45,6 @@ pub(crate) struct Symbol {
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub(crate) enum Rule {
     Blank,
-    CharacterSet(Vec<char>),
     String(String),
     Pattern(String),
     NamedSymbol(String),

From d482894c7d40b9b563262fef49e2ec81f96d346a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 8 Dec 2018 23:35:48 -0800
Subject: [PATCH 052/208] Implement expand_tokens

---
 src/grammars.rs                               |  12 +-
 src/main.rs                                   |  11 +-
 src/nfa.rs                                    |   3 +-
 src/prepare_grammar/expand_repeats.rs         |  40 +++---
 .../{normalize_rules.rs => expand_tokens.rs}  | 130 +++++++++++++-----
 src/prepare_grammar/extract_simple_aliases.rs |   1 -
 src/prepare_grammar/extract_tokens.rs         |  45 +++---
 src/prepare_grammar/flatten_grammar.rs        |   4 +-
 src/prepare_grammar/intern_symbols.rs         |  26 ++--
 src/prepare_grammar/mod.rs                    |  15 +-
 src/rules.rs                                  |  44 ++----
 11 files changed, 192 insertions(+), 139 deletions(-)
 rename src/prepare_grammar/{normalize_rules.rs => expand_tokens.rs} (61%)

diff --git a/src/grammars.rs b/src/grammars.rs
index 62910637..c5e9aaa1 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -1,4 +1,5 @@
 use crate::rules::{Associativity, Alias, Rule, Symbol};
+use crate::nfa::Nfa;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum VariableType {
@@ -30,10 +31,17 @@ pub(crate) struct InputGrammar {
 
 // Extracted lexical grammar
 
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct LexicalVariable {
+    pub name: String,
+    pub kind: VariableType,
+    pub nfa: Nfa,
+}
+
 #[derive(Debug, PartialEq, Eq)]
 pub(crate) struct LexicalGrammar {
-    pub variables: Vec<Variable>,
-    pub separators: Vec<Rule>,
+    pub variables: Vec<LexicalVariable>,
+    pub separators: Vec<Nfa>,
 }
 
 // Extracted syntax grammar
diff --git a/src/main.rs b/src/main.rs
index 4d376929..b83764fc 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -14,7 +14,7 @@ mod render;
 mod rules;
 mod tables;
 
-fn main() {
+fn main() -> error::Result<()> {
     let matches = App::new("tree-sitter")
         .version("0.1")
         .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
@@ -32,5 +32,12 @@ fn main() {
                 .arg(Arg::with_name("path").index(1).required(true))
                 .arg(Arg::with_name("line").index(2).required(true))
                 .arg(Arg::with_name("column").index(3).required(true))
-        );
+        ).get_matches();
+
+    if let Some(matches) = matches.subcommand_matches("generate") {
+        let code = generate::generate_parser_for_grammar(String::new())?;
+        println!("{}", code);
+    }
+
+    Ok(())
 }
diff --git a/src/nfa.rs b/src/nfa.rs
index 55aa11dc..22cb2a2e 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -7,13 +7,14 @@ pub enum CharacterSet {
     Exclude(Vec<char>),
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
 pub enum NfaState {
     Advance(CharacterSet, u32),
     Split(u32, u32),
     Accept,
 }
 
+#[derive(PartialEq, Eq)]
 pub struct Nfa {
     pub states: Vec<NfaState>
 }
diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs
index dcb8f916..85f37c80 100644
--- a/src/prepare_grammar/expand_repeats.rs
+++ b/src/prepare_grammar/expand_repeats.rs
@@ -3,7 +3,7 @@ use crate::grammars::{Variable, VariableType};
 use std::collections::HashMap;
 use std::mem;
 use std::rc::Rc;
-use super::ExtractedGrammar;
+use super::ExtractedSyntaxGrammar;
 
 struct Expander {
     variable_name: String,
@@ -25,16 +25,11 @@ impl Expander {
 
     fn expand_rule(&mut self, rule: &Rule) -> Rule {
         match rule {
-            Rule::Choice { elements } =>
-                Rule::Choice {
-                    elements: elements.iter().map(|element| self.expand_rule(element)).collect()
-                },
+            Rule::Choice(elements) =>
+                Rule::Choice(elements.iter().map(|element| self.expand_rule(element)).collect()),
 
-            Rule::Seq { left, right } =>
-                Rule::Seq {
-                    left: Rc::new(self.expand_rule(left)),
-                    right: Rc::new(self.expand_rule(right)),
-                },
+            Rule::Seq(elements) =>
+                Rule::Seq(elements.iter().map(|element| self.expand_rule(element)).collect()),
 
             Rule::Repeat(content) => {
                 let inner_rule = self.expand_rule(content);
@@ -46,27 +41,24 @@ impl Expander {
                 self.repeat_count_in_variable += 1;
                 let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable);
                 let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
-                let rc_symbol = Rc::new(Rule::Symbol(repeat_symbol));
                 self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
                 self.auxiliary_variables.push(Variable {
                     name: rule_name,
                     kind: VariableType::Auxiliary,
-                    rule: Rule::Choice {
-                        elements: vec![
-                            Rule::Seq {
-                                left: rc_symbol.clone(),
-                                right: rc_symbol
-                            },
-                            inner_rule
-                        ],
-                    },
+                    rule: Rule::Choice(vec![
+                        Rule::Seq(vec![
+                            Rule::Symbol(repeat_symbol),
+                            Rule::Symbol(repeat_symbol),
+                        ]),
+                        inner_rule
+                    ]),
                 });
 
                 Rule::Symbol(repeat_symbol)
             }
 
             Rule::Metadata { rule, params } => Rule::Metadata {
-                rule: Rc::new(self.expand_rule(rule)),
+                rule: Box::new(self.expand_rule(rule)),
                 params: params.clone()
             },
 
@@ -75,7 +67,7 @@ impl Expander {
     }
 }
 
-pub(super) fn expand_repeats(mut grammar: ExtractedGrammar) -> ExtractedGrammar {
+pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
     let mut expander = Expander {
         variable_name: String::new(),
         repeat_count_in_variable: 0,
@@ -207,8 +199,8 @@ mod tests {
         ]);
     }
 
-    fn build_grammar(variables: Vec<Variable>) -> ExtractedGrammar {
-        ExtractedGrammar {
+    fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
+        ExtractedSyntaxGrammar {
             variables,
             extra_tokens: Vec::new(),
             external_tokens: Vec::new(),
diff --git a/src/prepare_grammar/normalize_rules.rs b/src/prepare_grammar/expand_tokens.rs
similarity index 61%
rename from src/prepare_grammar/normalize_rules.rs
rename to src/prepare_grammar/expand_tokens.rs
index 67177b4f..9cfa819f 100644
--- a/src/prepare_grammar/normalize_rules.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -1,10 +1,11 @@
 use crate::error::{Error, Result};
 use crate::rules::Rule;
-use crate::grammars::LexicalGrammar;
-use crate::nfa::{Nfa, NfaState, NfaCursor, CharacterSet};
+use crate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::nfa::{Nfa, NfaState, CharacterSet};
+use super::{ExtractedLexicalGrammar};
 use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};
 
-fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
+fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
     match item {
         ClassPerlKind::Digit => CharacterSet::empty()
             .add_range('0', '9'),
@@ -21,7 +22,7 @@ fn evaluate_perl_class(item: &ClassPerlKind) -> CharacterSet {
     }
 }
 
-fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
+fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
     match item {
         ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
         ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
@@ -29,7 +30,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
         ClassSetItem::Union(union) => {
             let mut result = CharacterSet::empty();
             for item in &union.items {
-                result = result.add(evaluate_character_class(&item)?);
+                result = result.add(expand_character_class(&item)?);
             }
             Ok(result)
         }
@@ -37,7 +38,7 @@ fn evaluate_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
     }
 }
 
-fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
+fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
     match ast {
         Ast::Empty(_) => Ok(()),
         Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
@@ -53,12 +54,12 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
         Ast::Class(class) => match class {
             Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
             Class::Perl(class) => {
-                nfa.states.push(NfaState::Advance(evaluate_perl_class(&class.kind), next_state_index));
+                nfa.states.push(NfaState::Advance(expand_perl_character_class(&class.kind), next_state_index));
                 Ok(())
             },
             Class::Bracketed(class) => match &class.kind {
                 ClassSet::Item(item) => {
-                    let character_set = evaluate_character_class(&item)?;
+                    let character_set = expand_character_class(&item)?;
                     nfa.states.push(NfaState::Advance(character_set, next_state_index));
                     Ok(())
                 },
@@ -69,14 +70,14 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
         },
         Ast::Repetition(repetition) => match repetition.op.kind {
             RepetitionKind::ZeroOrOne => {
-                regex_to_nfa(&repetition.ast, nfa, next_state_index)?;
+                expand_regex(&repetition.ast, nfa, next_state_index)?;
                 nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index));
                 Ok(())
             },
             RepetitionKind::OneOrMore => {
                 nfa.states.push(NfaState::Accept); // Placeholder for split
                 let split_index = nfa.start_index();
-                regex_to_nfa(&repetition.ast, nfa, split_index)?;
+                expand_regex(&repetition.ast, nfa, split_index)?;
                 nfa.states[split_index as usize] = NfaState::Split(
                     nfa.start_index(),
                     next_state_index
@@ -86,7 +87,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
             RepetitionKind::ZeroOrMore => {
                 nfa.states.push(NfaState::Accept); // Placeholder for split
                 let split_index = nfa.start_index();
-                regex_to_nfa(&repetition.ast, nfa, split_index)?;
+                expand_regex(&repetition.ast, nfa, split_index)?;
                 nfa.states[split_index as usize] = NfaState::Split(
                     nfa.start_index(),
                     next_state_index
@@ -96,11 +97,11 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
             },
             RepetitionKind::Range(_) => unimplemented!(),
         },
-        Ast::Group(group) => regex_to_nfa(&group.ast, nfa, nfa.start_index()),
+        Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.start_index()),
         Ast::Alternation(alternation) => {
             let mut alternative_start_indices = Vec::new();
             for ast in alternation.asts.iter() {
-                regex_to_nfa(&ast, nfa, next_state_index)?;
+                expand_regex(&ast, nfa, next_state_index)?;
                 alternative_start_indices.push(nfa.start_index());
             }
             alternative_start_indices.pop();
@@ -111,7 +112,7 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
         },
         Ast::Concat(concat) => {
             for ast in concat.asts.iter().rev() {
-                regex_to_nfa(&ast, nfa, next_state_index)?;
+                expand_regex(&ast, nfa, next_state_index)?;
                 next_state_index = nfa.start_index();
             }
             Ok(())
@@ -119,32 +120,77 @@ fn regex_to_nfa(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
     }
 }
 
-fn expand_rule(rule: Rule) -> Result<Nfa> {
+fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
     match rule {
         Rule::Pattern(s) => {
             let ast = parse::Parser::new().parse(&s).map_err(|e| Error::GrammarError(e.to_string()))?;
-            let mut nfa = Nfa::new();
-            regex_to_nfa(&ast, &mut nfa, 0)?;
-            Ok(nfa)
+            expand_regex(&ast, nfa, next_state_index)?;
+            Ok(())
         },
         Rule::String(s) => {
-            let mut nfa = Nfa::new();
             for c in s.chars().rev() {
                 nfa.prepend(|start_index| NfaState::Advance(CharacterSet::empty().add_char(c), start_index));
             }
-            Ok(nfa)
+            Ok(())
+        },
+        Rule::Choice(elements) => {
+            let mut alternative_start_indices = Vec::new();
+            for element in elements {
+                expand_rule(element, nfa, next_state_index)?;
+                alternative_start_indices.push(nfa.start_index());
+            }
+            alternative_start_indices.pop();
+            for alternative_start_index in alternative_start_indices {
+                nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
+            }
+            Ok(())
+        },
+        Rule::Seq(elements) => {
+            for element in elements.into_iter().rev() {
+                expand_rule(element, nfa, next_state_index)?;
+                next_state_index = nfa.start_index();
+            }
+            Ok(())
+        },
+        Rule::Repeat(rule) => {
+            nfa.states.push(NfaState::Accept); // Placeholder for split
+            let split_index = nfa.start_index();
+            expand_rule(*rule, nfa, split_index)?;
+            nfa.states[split_index as usize] = NfaState::Split(
+                nfa.start_index(),
+                next_state_index
+            );
+            Ok(())
         },
         _ => Err(Error::grammar("Unexpected rule type")),
     }
 }
 
-pub(super) fn normalize_rules(grammar: LexicalGrammar) -> LexicalGrammar {
-    unimplemented!();
+pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+    let mut variables = Vec::new();
+    for variable in grammar.variables {
+        let mut nfa = Nfa::new();
+        expand_rule(variable.rule, &mut nfa, 0)?;
+        variables.push(LexicalVariable {
+            name: variable.name,
+            kind: variable.kind,
+            nfa,
+        });
+    }
+    let mut separators = Vec::new();
+    for separator in grammar.separators {
+        let mut nfa = Nfa::new();
+        expand_rule(separator, &mut nfa, 0)?;
+        separators.push(nfa);
+    }
+
+    Ok(LexicalGrammar { variables, separators })
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::nfa::NfaCursor;
 
     fn simulate_nfa<'a>(nfa: &'a Nfa, s: &'a str) -> Option<&'a str> {
         let mut result = None;
@@ -164,15 +210,15 @@ mod tests {
     }
 
     #[test]
-    fn test_regex_expansion() {
+    fn test_rule_expansion() {
         struct Row {
-            pattern: &'static str,
+            rule: Rule,
             examples: Vec<(&'static str, Option<&'static str>)>,
         }
 
         let table = [
             Row {
-                pattern: "a|bc",
+                rule: Rule::pattern("a|bc"),
                 examples: vec![
                     ("a12", Some("a")),
                     ("bc12", Some("bc")),
@@ -181,7 +227,7 @@ mod tests {
                 ],
             },
             Row {
-                pattern: "(a|b|c)d(e|f|g)h?",
+                rule: Rule::pattern("(a|b|c)d(e|f|g)h?"),
                 examples: vec![
                     ("ade1", Some("ade")),
                     ("bdf1", Some("bdf")),
@@ -190,14 +236,14 @@ mod tests {
                 ],
             },
             Row {
-                pattern: "a*",
+                rule: Rule::pattern("a*"),
                 examples: vec![
                     ("aaa1", Some("aaa")),
                     ("b", Some("")),
                 ],
             },
             Row {
-                pattern: "a((bc)+|(de)*)f",
+                rule: Rule::pattern("a((bc)+|(de)*)f"),
                 examples: vec![
                     ("af1", Some("af")),
                     ("adedef1", Some("adedef")),
@@ -206,21 +252,41 @@ mod tests {
                 ],
             },
             Row {
-                pattern: "[a-fA-F0-9]+",
+                rule: Rule::pattern("[a-fA-F0-9]+"),
                 examples: vec![
                     ("A1ff0", Some("A1ff")),
                 ],
             },
             Row {
-                pattern: "\\w\\d\\s",
+                rule: Rule::pattern("\\w\\d\\s"),
                 examples: vec![
                     ("_0  ", Some("_0 ")),
                 ],
             },
+            Row {
+                rule: Rule::string("abc"),
+                examples: vec![
+                    ("abcd", Some("abc")),
+                    ("ab", None),
+                ],
+            },
+            Row {
+                rule: Rule::repeat(Rule::seq(vec![
+                    Rule::string("{"),
+                    Rule::pattern("[a-f]+"),
+                    Rule::string("}"),
+                ])),
+                examples: vec![
+                    ("{a}{", Some("{a}")),
+                    ("{a}{d", Some("{a}")),
+                    ("ab", None),
+                ],
+            },
         ];
 
-        for Row { pattern, examples } in table.iter() {
-            let nfa = expand_rule(Rule::pattern(pattern)).unwrap();
+        for Row { rule, examples } in table.iter() {
+            let mut nfa = Nfa::new();
+            expand_rule(rule.clone(), &mut nfa, 0).unwrap();
             for (haystack, needle) in examples.iter() {
                 assert_eq!(simulate_nfa(&nfa, haystack), *needle);
             }
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs
index 250246f3..2a175242 100644
--- a/src/prepare_grammar/extract_simple_aliases.rs
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@@ -1,6 +1,5 @@
 use crate::rules::AliasMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
-use super::ExtractedGrammar;
 
 pub(super) fn extract_simple_aliases(
     syntax_grammar: &mut SyntaxGrammar,
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index ee90b3c8..7322516f 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -3,12 +3,12 @@ use std::rc::Rc;
 use std::mem;
 use crate::error::{Error, Result};
 use crate::rules::{Rule, MetadataParams, Symbol, SymbolType};
-use crate::grammars::{Variable, VariableType, LexicalGrammar, ExternalToken};
-use super::{InternedGrammar, ExtractedGrammar};
+use crate::grammars::{Variable, ExternalToken};
+use super::{InternedGrammar, ExtractedSyntaxGrammar, ExtractedLexicalGrammar};
 
 pub(super) fn extract_tokens(
     mut grammar: InternedGrammar
-) -> Result<(ExtractedGrammar, LexicalGrammar)> {
+) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
     let mut extractor = TokenExtractor {
         current_variable_name: String::new(),
         current_variable_token_count: 0,
@@ -138,7 +138,7 @@ pub(super) fn extract_tokens(
     }
 
     Ok((
-        ExtractedGrammar {
+        ExtractedSyntaxGrammar {
             variables,
             expected_conflicts,
             extra_tokens,
@@ -146,7 +146,7 @@ pub(super) fn extract_tokens(
             external_tokens,
             word_token,
         },
-        LexicalGrammar {
+        ExtractedLexicalGrammar {
             variables: lexical_variables,
             separators,
         }
@@ -198,20 +198,19 @@ impl TokenExtractor {
                 } else {
                     Rule::Metadata {
                         params: params.clone(),
-                        rule: Rc::new(self.extract_tokens_in_rule((&rule).clone()))
+                        rule: Box::new(self.extract_tokens_in_rule((&rule).clone()))
                     }
                 }
             },
             Rule::Repeat(content) => Rule::Repeat(
-                Rc::new(self.extract_tokens_in_rule(content))
+                Box::new(self.extract_tokens_in_rule(content))
+            ),
+            Rule::Seq(elements) => Rule::Seq(
+                elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
+            ),
+            Rule::Choice(elements) => Rule::Choice(
+                elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
             ),
-            Rule::Seq { left, right } => Rule::Seq {
-                left: Rc::new(self.extract_tokens_in_rule(left)),
-                right: Rc::new(self.extract_tokens_in_rule(right)),
-            },
-            Rule::Choice { elements } => Rule::Choice {
-                elements: elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
-            },
             _ => input.clone()
         }
     }
@@ -249,19 +248,18 @@ impl SymbolReplacer {
     fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
         match rule {
             Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
-            Rule::Choice { elements } => Rule::Choice {
-                elements: elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
-            },
-            Rule::Seq { left, right } => Rule::Seq {
-                left: Rc::new(self.replace_symbols_in_rule(left)),
-                right: Rc::new(self.replace_symbols_in_rule(right)),
-            },
+            Rule::Choice(elements) => Rule::Choice(
+                elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
+            ),
+            Rule::Seq(elements) => Rule::Seq(
+                elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
+            ),
             Rule::Repeat(content) => Rule::Repeat(
-                Rc::new(self.replace_symbols_in_rule(content))
+                Box::new(self.replace_symbols_in_rule(content))
             ),
             Rule::Metadata { rule, params } => Rule::Metadata {
                 params: params.clone(),
-                rule: Rc::new(self.replace_symbols_in_rule(rule)),
+                rule: Box::new(self.replace_symbols_in_rule(rule)),
             },
             _ => rule.clone()
         }
@@ -290,6 +288,7 @@ impl SymbolReplacer {
 #[cfg(test)]
 mod test {
     use super::*;
+    use crate::grammars::VariableType;
 
     #[test]
     fn test_extraction() {
diff --git a/src/prepare_grammar/flatten_grammar.rs b/src/prepare_grammar/flatten_grammar.rs
index 36fe76c9..0f09cd14 100644
--- a/src/prepare_grammar/flatten_grammar.rs
+++ b/src/prepare_grammar/flatten_grammar.rs
@@ -1,7 +1,7 @@
 use crate::error::Result;
 use crate::grammars::SyntaxGrammar;
-use super::ExtractedGrammar;
+use super::ExtractedSyntaxGrammar;
 
-pub(super) fn flatten_grammar(grammar: ExtractedGrammar) -> Result<SyntaxGrammar> {
+pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
     unimplemented!();
 }
diff --git a/src/prepare_grammar/intern_symbols.rs b/src/prepare_grammar/intern_symbols.rs
index e4cf7ff1..17132262 100644
--- a/src/prepare_grammar/intern_symbols.rs
+++ b/src/prepare_grammar/intern_symbols.rs
@@ -80,26 +80,26 @@ struct Interner<'a> {
 impl<'a> Interner<'a> {
     fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
         match rule {
-            Rule::Choice { elements } => {
+            Rule::Choice(elements) => {
                 let mut result = Vec::with_capacity(elements.len());
                 for element in elements {
                     result.push(self.intern_rule(element)?);
                 }
-                Ok(Rule::Choice { elements: result })
+                Ok(Rule::Choice(result))
             },
-
-            Rule::Seq { left, right } =>
-                Ok(Rule::Seq {
-                    left: Rc::new(self.intern_rule(left)?),
-                    right: Rc::new(self.intern_rule(right)?),
-                }),
-
-            Rule::Repeat(content) =>
-                Ok(Rule::Repeat(Rc::new(self.intern_rule(content)?))),
-
+            Rule::Seq(elements) => {
+                let mut result = Vec::with_capacity(elements.len());
+                for element in elements {
+                    result.push(self.intern_rule(element)?);
+                }
+                Ok(Rule::Seq(result))
+            },
+            Rule::Repeat(content) => Ok(Rule::Repeat(
+                Box::new(self.intern_rule(content)?)
+            )),
             Rule::Metadata { rule, params } =>
                 Ok(Rule::Metadata {
-                    rule: Rc::new(self.intern_rule(rule)?),
+                    rule: Box::new(self.intern_rule(rule)?),
                     params: params.clone()
                 }),
 
diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs
index b860807a..e2615479 100644
--- a/src/prepare_grammar/mod.rs
+++ b/src/prepare_grammar/mod.rs
@@ -2,7 +2,7 @@ mod intern_symbols;
 mod extract_tokens;
 mod expand_repeats;
 mod flatten_grammar;
-mod normalize_rules;
+mod expand_tokens;
 mod extract_simple_aliases;
 
 use crate::rules::{AliasMap, Rule, Symbol};
@@ -12,7 +12,7 @@ use self::intern_symbols::intern_symbols;
 use self::extract_tokens::extract_tokens;
 use self::expand_repeats::expand_repeats;
 use self::flatten_grammar::flatten_grammar;
-use self::normalize_rules::normalize_rules;
+use self::expand_tokens::expand_tokens;
 use self::extract_simple_aliases::extract_simple_aliases;
 
 pub(self) struct IntermediateGrammar<T, U> {
@@ -25,7 +25,14 @@ pub(self) struct IntermediateGrammar<T, U> {
 }
 
 pub(self) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
-pub(self) type ExtractedGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+pub(self) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+
+#[derive(Debug, PartialEq, Eq)]
+pub(self) struct ExtractedLexicalGrammar {
+    variables: Vec<Variable>,
+    separators: Vec<Rule>,
+}
 
 pub(crate) fn prepare_grammar(
     input_grammar: &InputGrammar
@@ -34,7 +41,7 @@ pub(crate) fn prepare_grammar(
     let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
     let syntax_grammar = expand_repeats(syntax_grammar);
     let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
-    let mut lexical_grammar = normalize_rules(lexical_grammar);
+    let mut lexical_grammar = expand_tokens(lexical_grammar)?;
     let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar);
     Ok((syntax_grammar, lexical_grammar, simple_aliases))
 }
diff --git a/src/rules.rs b/src/rules.rs
index b593496a..c6f18cf4 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -49,18 +49,13 @@ pub(crate) enum Rule {
     Pattern(String),
     NamedSymbol(String),
     Symbol(Symbol),
-    Choice {
-        elements: Vec<Rule>,
-    },
+    Choice(Vec<Rule>),
     Metadata {
         params: MetadataParams,
-        rule: Rc<Rule>,
+        rule: Box<Rule>,
     },
-    Repeat(Rc<Rule>),
-    Seq {
-        left: Rc<Rule>,
-        right: Rc<Rule>,
-    }
+    Repeat(Box<Rule>),
+    Seq(Vec<Rule>),
 }
 
 impl Rule {
@@ -98,7 +93,7 @@ impl Rule {
     }
 
     pub fn repeat(rule: Rule) -> Self {
-        Rule::Repeat(Rc::new(rule))
+        Rule::Repeat(Box::new(rule))
     }
 
     pub fn choice(rules: Vec<Rule>) -> Self {
@@ -106,32 +101,11 @@ impl Rule {
         for rule in rules {
             choice_helper(&mut elements, rule);
         }
-        Rule::Choice { elements }
+        Rule::Choice(elements)
     }
 
     pub fn seq(rules: Vec<Rule>) -> Self {
-        let mut result = Rule::Blank;
-        for rule in rules {
-            match rule {
-                Rule::Blank => continue,
-                Rule::Metadata { rule, params: _ } => {
-                    if *rule == Rule::Blank {
-                        continue;
-                    }
-                },
-                _ => {
-                    if result == Rule::Blank {
-                        result = rule;
-                    } else {
-                        result = Rule::Seq {
-                            left: Rc::new(result),
-                            right: Rc::new(rule),
-                        }
-                    }
-                }
-            }
-        }
-        result
+        Rule::Seq(rules)
     }
 
     pub fn terminal(index: usize) -> Self {
@@ -196,14 +170,14 @@ fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
         _ => {
             let mut params = MetadataParams::default();
             f(&mut params);
-            Rule::Metadata { rule: Rc::new(input), params }
+            Rule::Metadata { rule: Box::new(input), params }
         }
     }
 }
 
 fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
     match rule {
-        Rule::Choice {elements} => {
+        Rule::Choice(elements) => {
             for element in elements {
                 choice_helper(result, element);
             }

From b0a7c854a4939915703980c229093e70147a1615 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 10 Dec 2018 14:57:46 -0800
Subject: [PATCH 053/208] Avoid redundant regex complication when instantiating
 PropertySheets

---
 src/lib.rs | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 724a08bd..d70dc607 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -48,7 +48,7 @@ pub struct InputEdit {
 struct PropertyTransition {
     state_id: usize,
     child_index: Option<usize>,
-    text_regex: Option<Regex>,
+    text_regex_index: Option<usize>,
 }
 
 struct PropertyState {
@@ -66,6 +66,7 @@ pub enum PropertySheetError {
 pub struct PropertySheet<P: DeserializeOwned = HashMap<String, String>> {
     states: Vec<PropertyState>,
     property_sets: Vec<P>,
+    text_regexes: Vec<Regex>,
 }
 
 pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
@@ -615,11 +616,11 @@ impl<'a, P: DeserializeOwned> TreePropertyCursor<'a, P> {
             .get(&node_kind_id)
             .and_then(|transitions| {
                 for transition in transitions.iter() {
-                    if let Some(text_regex) = transition.text_regex.as_ref() {
+                    if let Some(text_regex_index) = transition.text_regex_index {
                         let node = self.cursor.node();
                         let text = &self.source.as_bytes()[node.start_byte()..node.end_byte()];
                         if let Ok(text) = str::from_utf8(text) {
-                            if !text_regex.is_match(text) {
+                            if !self.property_sheet.text_regexes[text_regex_index].is_match(text) {
                                 continue;
                             }
                         }
@@ -699,28 +700,37 @@ impl<P: DeserializeOwned> PropertySheet<P> {
         }
 
         let input: PropertySheetJSON<P> = serde_json::from_str(json)
-            .map_err(|e| PropertySheetError::InvalidJSON(e))?;
+            .map_err(PropertySheetError::InvalidJSON)?;
         let mut states = Vec::new();
+        let mut text_regexes = Vec::new();
+        let mut text_regex_patterns = Vec::new();
 
         for state in input.states.iter() {
             let mut transitions = HashMap::new();
             let node_kind_count = language.node_kind_count();
             for transition in state.transitions.iter() {
-                for i in 0..node_kind_count {
-                    let i = i as u16;
-                    if language.node_kind_is_named(i) == transition.named
-                        && transition.kind == language.node_kind_for_id(i)
+                let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() {
+                    if let Some(index) = text_regex_patterns.iter().position(|r| *r == regex_pattern) {
+                        Some(index)
+                    } else {
+                        text_regex_patterns.push(regex_pattern);
+                        text_regexes.push(Regex::new(&regex_pattern).map_err(PropertySheetError::InvalidRegex)?);
+                        Some(text_regexes.len() - 1)
+                    }
+                } else {
+                    None
+                };
+
+                for i in 0..(node_kind_count as u16) {
+                    if
+                        transition.kind == language.node_kind_for_id(i) &&
+                        transition.named == language.node_kind_is_named(i)
                     {
                         let entry = transitions.entry(i).or_insert(Vec::new());
-                        let text_regex = if let Some(text) = transition.text.as_ref() {
-                            Some(Regex::new(&text).map_err(|e| PropertySheetError::InvalidRegex(e))?)
-                        } else {
-                            None
-                        };
                         entry.push(PropertyTransition {
                             child_index: transition.index,
                             state_id: transition.state_id,
-                            text_regex
+                            text_regex_index,
                         });
                     }
                 }
@@ -734,6 +744,7 @@ impl<P: DeserializeOwned> PropertySheet<P> {
         Ok(Self {
             property_sets: input.property_sets,
             states,
+            text_regexes,
         })
     }
 }

From ba9da0a9b48dd7d374438eece53749061453fefe Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 11 Dec 2018 10:35:03 -0800
Subject: [PATCH 054/208] 0.3.4

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index f61b1583..fde4fd31 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.3.3"
+version = "0.3.4"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 build = "build.rs"
 license = "MIT"

From 7acfb2b74e5ba3d66aff67d9afb698add9cb8708 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 11 Dec 2018 12:14:34 -0800
Subject: [PATCH 055/208] Implement flatten_grammar

---
 src/grammars.rs                        |  19 +-
 src/prepare_grammar/expand_repeats.rs  | 221 +++++++++--------
 src/prepare_grammar/expand_tokens.rs   | 119 +++++----
 src/prepare_grammar/extract_tokens.rs  | 327 +++++++++++++------------
 src/prepare_grammar/flatten_grammar.rs | 312 ++++++++++++++++++++++-
 src/prepare_grammar/intern_symbols.rs  | 137 ++++++-----
 src/prepare_grammar/mod.rs             |  20 +-
 src/rules.rs                           |   8 +-
 8 files changed, 773 insertions(+), 390 deletions(-)

diff --git a/src/grammars.rs b/src/grammars.rs
index c5e9aaa1..3b3d47f7 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -52,7 +52,6 @@ pub(crate) struct ProductionStep {
   pub precedence: i32,
   pub associativity: Option<Associativity>,
   pub alias: Option<Alias>,
-  pub is_excluded: bool,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -65,6 +64,7 @@ pub(crate) struct Production {
 pub(crate) struct SyntaxVariable {
     pub name: String,
     pub kind: VariableType,
+    pub productions: Vec<Production>,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -81,7 +81,22 @@ pub(crate) struct SyntaxGrammar {
     pub expected_conflicts: Vec<Vec<Symbol>>,
     pub external_tokens: Vec<ExternalToken>,
     pub variables_to_inline: Vec<Symbol>,
-    pub word_token: Symbol,
+    pub word_token: Option<Symbol>,
+}
+
+impl ProductionStep {
+    pub(crate) fn new(symbol: Symbol) -> Self {
+        Self { symbol, precedence: 0, associativity: None, alias: None }
+    }
+
+    pub(crate) fn with_prec(self, precedence: i32, associativity: Option<Associativity>) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence,
+            associativity,
+            alias: self.alias,
+        }
+    }
 }
 
 impl Variable {
diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs
index 85f37c80..f3811c5f 100644
--- a/src/prepare_grammar/expand_repeats.rs
+++ b/src/prepare_grammar/expand_repeats.rs
@@ -1,16 +1,15 @@
-use crate::rules::{Rule, Symbol};
+use super::ExtractedSyntaxGrammar;
 use crate::grammars::{Variable, VariableType};
+use crate::rules::{Rule, Symbol};
 use std::collections::HashMap;
 use std::mem;
-use std::rc::Rc;
-use super::ExtractedSyntaxGrammar;
 
 struct Expander {
     variable_name: String,
     repeat_count_in_variable: usize,
     preceding_symbol_count: usize,
     auxiliary_variables: Vec<Variable>,
-    existing_repeats: HashMap<Rule, Symbol>
+    existing_repeats: HashMap<Rule, Symbol>,
 }
 
 impl Expander {
@@ -25,11 +24,19 @@ impl Expander {
 
     fn expand_rule(&mut self, rule: &Rule) -> Rule {
         match rule {
-            Rule::Choice(elements) =>
-                Rule::Choice(elements.iter().map(|element| self.expand_rule(element)).collect()),
+            Rule::Choice(elements) => Rule::Choice(
+                elements
+                    .iter()
+                    .map(|element| self.expand_rule(element))
+                    .collect(),
+            ),
 
-            Rule::Seq(elements) =>
-                Rule::Seq(elements.iter().map(|element| self.expand_rule(element)).collect()),
+            Rule::Seq(elements) => Rule::Seq(
+                elements
+                    .iter()
+                    .map(|element| self.expand_rule(element))
+                    .collect(),
+            ),
 
             Rule::Repeat(content) => {
                 let inner_rule = self.expand_rule(content);
@@ -39,9 +46,15 @@ impl Expander {
                 }
 
                 self.repeat_count_in_variable += 1;
-                let rule_name = format!("{}_repeat{}", self.variable_name, self.repeat_count_in_variable);
-                let repeat_symbol = Symbol::non_terminal(self.preceding_symbol_count + self.auxiliary_variables.len());
-                self.existing_repeats.insert(inner_rule.clone(), repeat_symbol);
+                let rule_name = format!(
+                    "{}_repeat{}",
+                    self.variable_name, self.repeat_count_in_variable
+                );
+                let repeat_symbol = Symbol::non_terminal(
+                    self.preceding_symbol_count + self.auxiliary_variables.len(),
+                );
+                self.existing_repeats
+                    .insert(inner_rule.clone(), repeat_symbol);
                 self.auxiliary_variables.push(Variable {
                     name: rule_name,
                     kind: VariableType::Auxiliary,
@@ -50,7 +63,7 @@ impl Expander {
                             Rule::Symbol(repeat_symbol),
                             Rule::Symbol(repeat_symbol),
                         ]),
-                        inner_rule
+                        inner_rule,
                     ]),
                 });
 
@@ -59,10 +72,10 @@ impl Expander {
 
             Rule::Metadata { rule, params } => Rule::Metadata {
                 rule: Box::new(self.expand_rule(rule)),
-                params: params.clone()
+                params: params.clone(),
             },
 
-            _ => rule.clone()
+            _ => rule.clone(),
         }
     }
 }
@@ -80,7 +93,9 @@ pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSy
         expander.expand_variable(&mut variable);
     }
 
-    grammar.variables.extend(expander.auxiliary_variables.into_iter());
+    grammar
+        .variables
+        .extend(expander.auxiliary_variables.into_iter());
     grammar
 }
 
@@ -91,112 +106,126 @@ mod tests {
     #[test]
     fn test_basic_repeat_expansion() {
         // Repeats nested inside of sequences and choices are expanded.
-        let grammar = expand_repeats(build_grammar(vec![
-            Variable::named("rule0", Rule::seq(vec![
+        let grammar = expand_repeats(build_grammar(vec![Variable::named(
+            "rule0",
+            Rule::seq(vec![
                 Rule::terminal(10),
                 Rule::choice(vec![
                     Rule::repeat(Rule::terminal(11)),
                     Rule::repeat(Rule::terminal(12)),
                 ]),
                 Rule::terminal(13),
-            ])),
-        ]));
+            ]),
+        )]));
 
-        assert_eq!(grammar.variables, vec![
-            Variable::named("rule0", Rule::seq(vec![
-                Rule::terminal(10),
-                Rule::choice(vec![
-                    Rule::non_terminal(1),
-                    Rule::non_terminal(2),
-                ]),
-                Rule::terminal(13),
-            ])),
-            Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
-                Rule::seq(vec![
-                    Rule::non_terminal(1),
-                    Rule::non_terminal(1),
-                ]),
-                Rule::terminal(11),
-            ])),
-            Variable::auxiliary("rule0_repeat2", Rule::choice(vec![
-                Rule::seq(vec![
-                    Rule::non_terminal(2),
-                    Rule::non_terminal(2),
-                ]),
-                Rule::terminal(12),
-            ])),
-        ]);
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::seq(vec![
+                        Rule::terminal(10),
+                        Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]),
+                        Rule::terminal(13),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
+                        Rule::terminal(11),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat2",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::terminal(12),
+                    ])
+                ),
+            ]
+        );
     }
 
     #[test]
     fn test_repeat_deduplication() {
         // Terminal 4 appears inside of a repeat in three different places.
         let grammar = expand_repeats(build_grammar(vec![
-            Variable::named("rule0", Rule::choice(vec![
-                Rule::seq(vec![ Rule::terminal(1), Rule::repeat(Rule::terminal(4)) ]),
-                Rule::seq(vec![ Rule::terminal(2), Rule::repeat(Rule::terminal(4)) ]),
-            ])),
-            Variable::named("rule1", Rule::seq(vec![
-                Rule::terminal(3),
-                Rule::repeat(Rule::terminal(4)),
-            ])),
+            Variable::named(
+                "rule0",
+                Rule::choice(vec![
+                    Rule::seq(vec![Rule::terminal(1), Rule::repeat(Rule::terminal(4))]),
+                    Rule::seq(vec![Rule::terminal(2), Rule::repeat(Rule::terminal(4))]),
+                ]),
+            ),
+            Variable::named(
+                "rule1",
+                Rule::seq(vec![Rule::terminal(3), Rule::repeat(Rule::terminal(4))]),
+            ),
         ]));
 
         // Only one auxiliary rule is created for repeating terminal 4.
-        assert_eq!(grammar.variables, vec![
-            Variable::named("rule0", Rule::choice(vec![
-                Rule::seq(vec![ Rule::terminal(1), Rule::non_terminal(2) ]),
-                Rule::seq(vec![ Rule::terminal(2), Rule::non_terminal(2) ]),
-            ])),
-            Variable::named("rule1", Rule::seq(vec![
-                Rule::terminal(3),
-                Rule::non_terminal(2),
-            ])),
-            Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
-                Rule::seq(vec![
-                    Rule::non_terminal(2),
-                    Rule::non_terminal(2),
-                ]),
-                Rule::terminal(4),
-            ]))
-        ]);
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::terminal(1), Rule::non_terminal(2)]),
+                        Rule::seq(vec![Rule::terminal(2), Rule::non_terminal(2)]),
+                    ])
+                ),
+                Variable::named(
+                    "rule1",
+                    Rule::seq(vec![Rule::terminal(3), Rule::non_terminal(2),])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::terminal(4),
+                    ])
+                )
+            ]
+        );
     }
 
     #[test]
     fn test_expansion_of_nested_repeats() {
-        let grammar = expand_repeats(build_grammar(vec![
-            Variable::named("rule0", Rule::seq(vec![
+        let grammar = expand_repeats(build_grammar(vec![Variable::named(
+            "rule0",
+            Rule::seq(vec![
                 Rule::terminal(10),
                 Rule::repeat(Rule::seq(vec![
                     Rule::terminal(11),
-                    Rule::repeat(Rule::terminal(12))
+                    Rule::repeat(Rule::terminal(12)),
                 ])),
-            ])),
-        ]));
+            ]),
+        )]));
 
-        assert_eq!(grammar.variables, vec![
-            Variable::named("rule0", Rule::seq(vec![
-                Rule::terminal(10),
-                Rule::non_terminal(2),
-            ])),
-            Variable::auxiliary("rule0_repeat1", Rule::choice(vec![
-                Rule::seq(vec![
-                    Rule::non_terminal(1),
-                    Rule::non_terminal(1),
-                ]),
-                Rule::terminal(12),
-            ])),
-            Variable::auxiliary("rule0_repeat2", Rule::choice(vec![
-                Rule::seq(vec![
-                    Rule::non_terminal(2),
-                    Rule::non_terminal(2),
-                ]),
-                Rule::seq(vec![
-                    Rule::terminal(11),
-                    Rule::non_terminal(1),
-                ]),
-            ])),
-        ]);
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "rule0",
+                    Rule::seq(vec![Rule::terminal(10), Rule::non_terminal(2),])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat1",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
+                        Rule::terminal(12),
+                    ])
+                ),
+                Variable::auxiliary(
+                    "rule0_repeat2",
+                    Rule::choice(vec![
+                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
+                        Rule::seq(vec![Rule::terminal(11), Rule::non_terminal(1),]),
+                    ])
+                ),
+            ]
+        );
     }
 
     fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 9cfa819f..e0e1f9a9 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -1,14 +1,13 @@
+use super::ExtractedLexicalGrammar;
 use crate::error::{Error, Result};
-use crate::rules::Rule;
 use crate::grammars::{LexicalGrammar, LexicalVariable};
-use crate::nfa::{Nfa, NfaState, CharacterSet};
-use super::{ExtractedLexicalGrammar};
+use crate::nfa::{CharacterSet, Nfa, NfaState};
+use crate::rules::Rule;
 use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};
 
 fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
     match item {
-        ClassPerlKind::Digit => CharacterSet::empty()
-            .add_range('0', '9'),
+        ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
         ClassPerlKind::Space => CharacterSet::empty()
             .add_char(' ')
             .add_char('\t')
@@ -18,7 +17,7 @@ fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
             .add_char('_')
             .add_range('A', 'Z')
             .add_range('a', 'z')
-            .add_range('0', '9')
+            .add_range('0', '9'),
     }
 }
 
@@ -26,7 +25,9 @@ fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
     match item {
         ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
         ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
-        ClassSetItem::Range(range) => Ok(CharacterSet::empty().add_range(range.start.c, range.end.c)),
+        ClassSetItem::Range(range) => {
+            Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
+        }
         ClassSetItem::Union(union) => {
             let mut result = CharacterSet::empty();
             for item in &union.items {
@@ -43,58 +44,64 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
         Ast::Empty(_) => Ok(()),
         Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
         Ast::Literal(literal) => {
-            nfa.states.push(NfaState::Advance(CharacterSet::Include(vec![literal.c]), next_state_index));
+            nfa.states.push(NfaState::Advance(
+                CharacterSet::Include(vec![literal.c]),
+                next_state_index,
+            ));
             Ok(())
-        },
+        }
         Ast::Dot(_) => {
-            nfa.states.push(NfaState::Advance(CharacterSet::Exclude(vec!['\n']), next_state_index));
+            nfa.states.push(NfaState::Advance(
+                CharacterSet::Exclude(vec!['\n']),
+                next_state_index,
+            ));
             Ok(())
-        },
+        }
         Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
         Ast::Class(class) => match class {
             Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
             Class::Perl(class) => {
-                nfa.states.push(NfaState::Advance(expand_perl_character_class(&class.kind), next_state_index));
+                nfa.states.push(NfaState::Advance(
+                    expand_perl_character_class(&class.kind),
+                    next_state_index,
+                ));
                 Ok(())
-            },
+            }
             Class::Bracketed(class) => match &class.kind {
                 ClassSet::Item(item) => {
                     let character_set = expand_character_class(&item)?;
-                    nfa.states.push(NfaState::Advance(character_set, next_state_index));
+                    nfa.states
+                        .push(NfaState::Advance(character_set, next_state_index));
                     Ok(())
-                },
-                ClassSet::BinaryOp(_) => {
-                    Err(Error::regex("Binary operators in character classes aren't supported"))
                 }
-            }
+                ClassSet::BinaryOp(_) => Err(Error::regex(
+                    "Binary operators in character classes aren't supported",
+                )),
+            },
         },
         Ast::Repetition(repetition) => match repetition.op.kind {
             RepetitionKind::ZeroOrOne => {
                 expand_regex(&repetition.ast, nfa, next_state_index)?;
                 nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index));
                 Ok(())
-            },
+            }
             RepetitionKind::OneOrMore => {
                 nfa.states.push(NfaState::Accept); // Placeholder for split
                 let split_index = nfa.start_index();
                 expand_regex(&repetition.ast, nfa, split_index)?;
-                nfa.states[split_index as usize] = NfaState::Split(
-                    nfa.start_index(),
-                    next_state_index
-                );
+                nfa.states[split_index as usize] =
+                    NfaState::Split(nfa.start_index(), next_state_index);
                 Ok(())
-            },
+            }
             RepetitionKind::ZeroOrMore => {
                 nfa.states.push(NfaState::Accept); // Placeholder for split
                 let split_index = nfa.start_index();
                 expand_regex(&repetition.ast, nfa, split_index)?;
-                nfa.states[split_index as usize] = NfaState::Split(
-                    nfa.start_index(),
-                    next_state_index
-                );
+                nfa.states[split_index as usize] =
+                    NfaState::Split(nfa.start_index(), next_state_index);
                 nfa.prepend(|start_index| NfaState::Split(start_index, next_state_index));
                 Ok(())
-            },
+            }
             RepetitionKind::Range(_) => unimplemented!(),
         },
         Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.start_index()),
@@ -109,7 +116,7 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
                 nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
             }
             Ok(())
-        },
+        }
         Ast::Concat(concat) => {
             for ast in concat.asts.iter().rev() {
                 expand_regex(&ast, nfa, next_state_index)?;
@@ -123,16 +130,20 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
 fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
     match rule {
         Rule::Pattern(s) => {
-            let ast = parse::Parser::new().parse(&s).map_err(|e| Error::GrammarError(e.to_string()))?;
+            let ast = parse::Parser::new()
+                .parse(&s)
+                .map_err(|e| Error::GrammarError(e.to_string()))?;
             expand_regex(&ast, nfa, next_state_index)?;
             Ok(())
-        },
+        }
         Rule::String(s) => {
             for c in s.chars().rev() {
-                nfa.prepend(|start_index| NfaState::Advance(CharacterSet::empty().add_char(c), start_index));
+                nfa.prepend(|start_index| {
+                    NfaState::Advance(CharacterSet::empty().add_char(c), start_index)
+                });
             }
             Ok(())
-        },
+        }
         Rule::Choice(elements) => {
             let mut alternative_start_indices = Vec::new();
             for element in elements {
@@ -144,24 +155,21 @@ fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
                 nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
             }
             Ok(())
-        },
+        }
         Rule::Seq(elements) => {
             for element in elements.into_iter().rev() {
                 expand_rule(element, nfa, next_state_index)?;
                 next_state_index = nfa.start_index();
             }
             Ok(())
-        },
+        }
         Rule::Repeat(rule) => {
             nfa.states.push(NfaState::Accept); // Placeholder for split
             let split_index = nfa.start_index();
             expand_rule(*rule, nfa, split_index)?;
-            nfa.states[split_index as usize] = NfaState::Split(
-                nfa.start_index(),
-                next_state_index
-            );
+            nfa.states[split_index as usize] = NfaState::Split(nfa.start_index(), next_state_index);
             Ok(())
-        },
+        }
         _ => Err(Error::grammar("Unexpected rule type")),
     }
 }
@@ -184,7 +192,10 @@ pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalG
         separators.push(nfa);
     }
 
-    Ok(LexicalGrammar { variables, separators })
+    Ok(LexicalGrammar {
+        variables,
+        separators,
+    })
 }
 
 #[cfg(test)]
@@ -237,10 +248,7 @@ mod tests {
             },
             Row {
                 rule: Rule::pattern("a*"),
-                examples: vec![
-                    ("aaa1", Some("aaa")),
-                    ("b", Some("")),
-                ],
+                examples: vec![("aaa1", Some("aaa")), ("b", Some(""))],
             },
             Row {
                 rule: Rule::pattern("a((bc)+|(de)*)f"),
@@ -253,22 +261,15 @@ mod tests {
             },
             Row {
                 rule: Rule::pattern("[a-fA-F0-9]+"),
-                examples: vec![
-                    ("A1ff0", Some("A1ff")),
-                ],
+                examples: vec![("A1ff0", Some("A1ff"))],
             },
             Row {
                 rule: Rule::pattern("\\w\\d\\s"),
-                examples: vec![
-                    ("_0  ", Some("_0 ")),
-                ],
+                examples: vec![("_0  ", Some("_0 "))],
             },
             Row {
                 rule: Rule::string("abc"),
-                examples: vec![
-                    ("abcd", Some("abc")),
-                    ("ab", None),
-                ],
+                examples: vec![("abcd", Some("abc")), ("ab", None)],
             },
             Row {
                 rule: Rule::repeat(Rule::seq(vec![
@@ -276,11 +277,7 @@ mod tests {
                     Rule::pattern("[a-f]+"),
                     Rule::string("}"),
                 ])),
-                examples: vec![
-                    ("{a}{", Some("{a}")),
-                    ("{a}{d", Some("{a}")),
-                    ("ab", None),
-                ],
+                examples: vec![("{a}{", Some("{a}")), ("{a}{d", Some("{a}")), ("ab", None)],
             },
         ];
 
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index 7322516f..d53555af 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -1,13 +1,12 @@
-use std::collections::HashMap;
-use std::rc::Rc;
-use std::mem;
+use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
 use crate::error::{Error, Result};
-use crate::rules::{Rule, MetadataParams, Symbol, SymbolType};
-use crate::grammars::{Variable, ExternalToken};
-use super::{InternedGrammar, ExtractedSyntaxGrammar, ExtractedLexicalGrammar};
+use crate::grammars::{ExternalToken, Variable};
+use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
+use std::collections::HashMap;
+use std::mem;
 
 pub(super) fn extract_tokens(
-    mut grammar: InternedGrammar
+    mut grammar: InternedGrammar,
 ) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
     let mut extractor = TokenExtractor {
         current_variable_name: String::new(),
@@ -40,9 +39,15 @@ pub(super) fn extract_tokens(
     // variable in the lexical grammar. Symbols that pointed to later variables
     // will need to have their indices decremented.
     let mut variables = Vec::new();
-    let mut symbol_replacer = SymbolReplacer { replacements: HashMap::new() };
+    let mut symbol_replacer = SymbolReplacer {
+        replacements: HashMap::new(),
+    };
     for (i, variable) in grammar.variables.into_iter().enumerate() {
-        if let Rule::Symbol(Symbol { kind: SymbolType::Terminal, index }) = variable.rule {
+        if let Rule::Symbol(Symbol {
+            kind: SymbolType::Terminal,
+            index,
+        }) = variable.rule
+        {
             if i > 0 && extractor.extracted_usage_counts[index] == 1 {
                 let mut lexical_variable = &mut lexical_variables[index];
                 lexical_variable.kind = variable.kind;
@@ -58,16 +63,19 @@ pub(super) fn extract_tokens(
         variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
     }
 
-    let expected_conflicts = grammar.expected_conflicts
+    let expected_conflicts = grammar
+        .expected_conflicts
         .into_iter()
-        .map(|conflict|
+        .map(|conflict| {
             conflict
                 .iter()
                 .map(|symbol| symbol_replacer.replace_symbol(*symbol))
                 .collect()
-        ).collect();
+        })
+        .collect();
 
-    let variables_to_inline = grammar.variables_to_inline
+    let variables_to_inline = grammar
+        .variables_to_inline
         .into_iter()
         .map(|symbol| symbol_replacer.replace_symbol(symbol))
         .collect();
@@ -149,7 +157,7 @@ pub(super) fn extract_tokens(
         ExtractedLexicalGrammar {
             variables: lexical_variables,
             separators,
-        }
+        },
     ))
 }
 
@@ -161,7 +169,7 @@ struct TokenExtractor {
 }
 
 struct SymbolReplacer {
-    replacements: HashMap<usize, usize>
+    replacements: HashMap<usize, usize>,
 }
 
 impl TokenExtractor {
@@ -198,20 +206,24 @@ impl TokenExtractor {
                 } else {
                     Rule::Metadata {
                         params: params.clone(),
-                        rule: Box::new(self.extract_tokens_in_rule((&rule).clone()))
+                        rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
                     }
                 }
-            },
-            Rule::Repeat(content) => Rule::Repeat(
-                Box::new(self.extract_tokens_in_rule(content))
-            ),
+            }
+            Rule::Repeat(content) => Rule::Repeat(Box::new(self.extract_tokens_in_rule(content))),
             Rule::Seq(elements) => Rule::Seq(
-                elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
+                elements
+                    .iter()
+                    .map(|e| self.extract_tokens_in_rule(e))
+                    .collect(),
             ),
             Rule::Choice(elements) => Rule::Choice(
-                elements.iter().map(|e| self.extract_tokens_in_rule(e)).collect()
+                elements
+                    .iter()
+                    .map(|e| self.extract_tokens_in_rule(e))
+                    .collect(),
             ),
-            _ => input.clone()
+            _ => input.clone(),
         }
     }
 
@@ -219,7 +231,7 @@ impl TokenExtractor {
         for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
             if variable.rule == *rule {
                 self.extracted_usage_counts[i] += 1;
-                return Symbol::terminal(i)
+                return Symbol::terminal(i);
             }
         }
 
@@ -231,10 +243,9 @@ impl TokenExtractor {
             Variable::auxiliary(
                 &format!(
                     "{}_token{}",
-                    &self.current_variable_name,
-                    self.current_variable_token_count
+                    &self.current_variable_name, self.current_variable_token_count
                 ),
-                rule.clone()
+                rule.clone(),
             )
         };
 
@@ -249,25 +260,29 @@ impl SymbolReplacer {
         match rule {
             Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
             Rule::Choice(elements) => Rule::Choice(
-                elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
+                elements
+                    .iter()
+                    .map(|e| self.replace_symbols_in_rule(e))
+                    .collect(),
             ),
             Rule::Seq(elements) => Rule::Seq(
-                elements.iter().map(|e| self.replace_symbols_in_rule(e)).collect()
-            ),
-            Rule::Repeat(content) => Rule::Repeat(
-                Box::new(self.replace_symbols_in_rule(content))
+                elements
+                    .iter()
+                    .map(|e| self.replace_symbols_in_rule(e))
+                    .collect(),
             ),
+            Rule::Repeat(content) => Rule::Repeat(Box::new(self.replace_symbols_in_rule(content))),
             Rule::Metadata { rule, params } => Rule::Metadata {
                 params: params.clone(),
                 rule: Box::new(self.replace_symbols_in_rule(rule)),
             },
-            _ => rule.clone()
+            _ => rule.clone(),
         }
     }
 
     fn replace_symbol(&self, symbol: Symbol) -> Symbol {
         if !symbol.is_non_terminal() {
-            return symbol
+            return symbol;
         }
 
         if let Some(replacement) = self.replacements.get(&symbol.index) {
@@ -293,81 +308,95 @@ mod test {
     #[test]
     fn test_extraction() {
         let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
-            Variable::named("rule_0", Rule::repeat(Rule::seq(vec![
-                Rule::string("a"),
-                Rule::pattern("b"),
-                Rule::choice(vec![
-                    Rule::non_terminal(1),
-                    Rule::non_terminal(2),
-                    Rule::token(Rule::repeat(Rule::choice(vec![
-                        Rule::string("c"),
-                        Rule::string("d"),
-                    ])))
-                ])
-            ]))),
+            Variable::named(
+                "rule_0",
+                Rule::repeat(Rule::seq(vec![
+                    Rule::string("a"),
+                    Rule::pattern("b"),
+                    Rule::choice(vec![
+                        Rule::non_terminal(1),
+                        Rule::non_terminal(2),
+                        Rule::token(Rule::repeat(Rule::choice(vec![
+                            Rule::string("c"),
+                            Rule::string("d"),
+                        ]))),
+                    ]),
+                ])),
+            ),
             Variable::named("rule_1", Rule::pattern("e")),
             Variable::named("rule_2", Rule::pattern("b")),
-            Variable::named("rule_3", Rule::seq(vec![
-                Rule::non_terminal(2),
-                Rule::Blank,
-            ])),
-        ])).unwrap();
+            Variable::named(
+                "rule_3",
+                Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
+            ),
+        ]))
+        .unwrap();
 
-        assert_eq!(syntax_grammar.variables, vec![
-            Variable::named("rule_0", Rule::repeat(Rule::seq(vec![
-                // The string "a" was replaced by a symbol referencing the lexical grammar
-                Rule::terminal(0),
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![
+                Variable::named(
+                    "rule_0",
+                    Rule::repeat(Rule::seq(vec![
+                        // The string "a" was replaced by a symbol referencing the lexical grammar
+                        Rule::terminal(0),
+                        // The pattern "b" was replaced by a symbol referencing the lexical grammar
+                        Rule::terminal(1),
+                        Rule::choice(vec![
+                            // The symbol referencing `rule_1` was replaced by a symbol referencing
+                            // the lexical grammar.
+                            Rule::terminal(3),
+                            // The symbol referencing `rule_2` had its index decremented because
+                            // `rule_1` was moved to the lexical grammar.
+                            Rule::non_terminal(1),
+                            // The rule wrapped in `token` was replaced by a symbol referencing
+                            // the lexical grammar.
+                            Rule::terminal(2),
+                        ])
+                    ]))
+                ),
+                // The pattern "e" was only used in once place: as the definition of `rule_1`,
+                // so that rule was moved to the lexical grammar. The pattern "b" appeared in
+                // two places, so it was not moved into the lexical grammar.
+                Variable::named("rule_2", Rule::terminal(1)),
+                Variable::named(
+                    "rule_3",
+                    Rule::seq(vec![Rule::non_terminal(1), Rule::Blank,])
+                ),
+            ]
+        );
 
-                // The pattern "b" was replaced by a symbol referencing the lexical grammar
-                Rule::terminal(1),
-                Rule::choice(vec![
-                    // The symbol referencing `rule_1` was replaced by a symbol referencing
-                    // the lexical grammar.
-                    Rule::terminal(3),
-
-                    // The symbol referencing `rule_2` had its index decremented because
-                    // `rule_1` was moved to the lexical grammar.
-                    Rule::non_terminal(1),
-
-                    // The rule wrapped in `token` was replaced by a symbol referencing
-                    // the lexical grammar.
-                    Rule::terminal(2),
-                ])
-            ]))),
-
-            // The pattern "e" was only used in once place: as the definition of `rule_1`,
-            // so that rule was moved to the lexical grammar. The pattern "b" appeared in
-            // two places, so it was not moved into the lexical grammar.
-            Variable::named("rule_2", Rule::terminal(1)),
-            Variable::named("rule_3", Rule::seq(vec![
-                Rule::non_terminal(1),
-                Rule::Blank,
-            ])),
-        ]);
-
-        assert_eq!(lexical_grammar.variables, vec![
-            Variable::anonymous("a", Rule::string("a")),
-            Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
-            Variable::auxiliary("rule_0_token2", Rule::repeat(Rule::choice(vec![
-                Rule::string("c"),
-                Rule::string("d"),
-            ]))),
-            Variable::named("rule_1", Rule::pattern("e")),
-        ]);
+        assert_eq!(
+            lexical_grammar.variables,
+            vec![
+                Variable::anonymous("a", Rule::string("a")),
+                Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
+                Variable::auxiliary(
+                    "rule_0_token2",
+                    Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
+                ),
+                Variable::named("rule_1", Rule::pattern("e")),
+            ]
+        );
     }
 
     #[test]
     fn test_start_rule_is_token() {
-        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
-            Variable::named("rule_0", Rule::string("hello")),
-        ])).unwrap();
+        let (syntax_grammar, lexical_grammar) =
+            extract_tokens(build_grammar(vec![Variable::named(
+                "rule_0",
+                Rule::string("hello"),
+            )]))
+            .unwrap();
 
-        assert_eq!(syntax_grammar.variables, vec![
-            Variable::named("rule_0", Rule::terminal(0)),
-        ]);
-        assert_eq!(lexical_grammar.variables, vec![
-            Variable::anonymous("hello", Rule::string("hello")),
-        ])
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![Variable::named("rule_0", Rule::terminal(0)),]
+        );
+        assert_eq!(
+            lexical_grammar.variables,
+            vec![Variable::anonymous("hello", Rule::string("hello")),]
+        )
     }
 
     #[test]
@@ -376,29 +405,25 @@ mod test {
             Variable::named("rule_0", Rule::string("x")),
             Variable::named("comment", Rule::pattern("//.*")),
         ]);
-        grammar.extra_tokens = vec![
-            Rule::string(" "),
-            Rule::non_terminal(1),
-        ];
+        grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)];
 
         let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
-        assert_eq!(syntax_grammar.extra_tokens, vec![
-            Symbol::terminal(1),
-        ]);
-        assert_eq!(lexical_grammar.separators, vec![
-            Rule::string(" "),
-        ]);
+        assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]);
+        assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
     }
 
     #[test]
     fn test_extract_externals() {
         let mut grammar = build_grammar(vec![
-            Variable::named("rule_0", Rule::seq(vec![
-                Rule::external(0),
-                Rule::string("a"),
-                Rule::non_terminal(1),
-                Rule::non_terminal(2),
-            ])),
+            Variable::named(
+                "rule_0",
+                Rule::seq(vec![
+                    Rule::external(0),
+                    Rule::string("a"),
+                    Rule::non_terminal(1),
+                    Rule::non_terminal(2),
+                ]),
+            ),
             Variable::named("rule_1", Rule::string("b")),
             Variable::named("rule_2", Rule::string("c")),
         ]);
@@ -410,23 +435,26 @@ mod test {
 
         let (syntax_grammar, _) = extract_tokens(grammar).unwrap();
 
-        assert_eq!(syntax_grammar.external_tokens, vec![
-            ExternalToken {
-                name: "external_0".to_string(),
-                kind: VariableType::Named,
-                corresponding_internal_token: None,
-            },
-            ExternalToken {
-                name: "a".to_string(),
-                kind: VariableType::Anonymous,
-                corresponding_internal_token: Some(Symbol::terminal(0)),
-            },
-            ExternalToken {
-                name: "rule_2".to_string(),
-                kind: VariableType::Named,
-                corresponding_internal_token: Some(Symbol::terminal(2)),
-            },
-        ]);
+        assert_eq!(
+            syntax_grammar.external_tokens,
+            vec![
+                ExternalToken {
+                    name: "external_0".to_string(),
+                    kind: VariableType::Named,
+                    corresponding_internal_token: None,
+                },
+                ExternalToken {
+                    name: "a".to_string(),
+                    kind: VariableType::Anonymous,
+                    corresponding_internal_token: Some(Symbol::terminal(0)),
+                },
+                ExternalToken {
+                    name: "rule_2".to_string(),
+                    kind: VariableType::Named,
+                    corresponding_internal_token: Some(Symbol::terminal(2)),
+                },
+            ]
+        );
     }
 
     #[test]
@@ -436,14 +464,15 @@ mod test {
             Variable::named("rule_1", Rule::non_terminal(2)),
             Variable::named("rule_2", Rule::string("x")),
         ]);
-        grammar.extra_tokens = vec![
-            Rule::non_terminal(1),
-        ];
+        grammar.extra_tokens = vec![Rule::non_terminal(1)];
 
         match extract_tokens(grammar) {
             Err(Error::GrammarError(s)) => {
-                assert_eq!(s, "Non-token symbol 'rule_1' cannot be used as an extra token");
-            },
+                assert_eq!(
+                    s,
+                    "Non-token symbol 'rule_1' cannot be used as an extra token"
+                );
+            }
             _ => {
                 panic!("Expected an error but got no error");
             }
@@ -453,24 +482,22 @@ mod test {
     #[test]
     fn test_error_on_external_with_same_name_as_non_terminal() {
         let mut grammar = build_grammar(vec![
-            Variable::named("rule_0", Rule::seq(vec![
-                Rule::non_terminal(1),
-                Rule::non_terminal(2),
-            ])),
-            Variable::named("rule_1", Rule::seq(vec![
-                Rule::non_terminal(2),
-                Rule::non_terminal(2),
-            ])),
+            Variable::named(
+                "rule_0",
+                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
+            ),
+            Variable::named(
+                "rule_1",
+                Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2)]),
+            ),
             Variable::named("rule_2", Rule::string("a")),
         ]);
-        grammar.external_tokens = vec![
-            Variable::named("rule_1", Rule::non_terminal(1)),
-        ];
+        grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
 
         match extract_tokens(grammar) {
             Err(Error::GrammarError(s)) => {
                 assert_eq!(s, "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
-            },
+            }
             _ => {
                 panic!("Expected an error but got no error");
             }
diff --git a/src/prepare_grammar/flatten_grammar.rs b/src/prepare_grammar/flatten_grammar.rs
index 0f09cd14..3ffef086 100644
--- a/src/prepare_grammar/flatten_grammar.rs
+++ b/src/prepare_grammar/flatten_grammar.rs
@@ -1,7 +1,313 @@
-use crate::error::Result;
-use crate::grammars::SyntaxGrammar;
 use super::ExtractedSyntaxGrammar;
+use crate::error::Result;
+use crate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
+use crate::rules::{Alias, Associativity, Rule};
+
+struct RuleFlattener {
+    production: Production,
+    precedence_stack: Vec<i32>,
+    associativity_stack: Vec<Associativity>,
+    alias_stack: Vec<Alias>,
+}
+
+impl RuleFlattener {
+    fn new() -> Self {
+        Self {
+            production: Production {
+                steps: Vec::new(),
+                dynamic_precedence: 0,
+            },
+            precedence_stack: Vec::new(),
+            associativity_stack: Vec::new(),
+            alias_stack: Vec::new(),
+        }
+    }
+
+    fn flatten(mut self, rule: Rule) -> Production {
+        self.apply(rule, true);
+        self.production
+    }
+
+    fn apply(&mut self, rule: Rule, at_end: bool) {
+        match rule {
+            Rule::Seq(members) => {
+                let last_index = members.len() - 1;
+                for (i, member) in members.into_iter().enumerate() {
+                    self.apply(member, i == last_index && at_end);
+                }
+            }
+            Rule::Metadata { rule, params } => {
+                let mut has_precedence = false;
+                if let Some(precedence) = params.precedence {
+                    has_precedence = true;
+                    self.precedence_stack.push(precedence);
+                }
+
+                let mut has_associativity = false;
+                if let Some(associativity) = params.associativity {
+                    has_associativity = true;
+                    self.associativity_stack.push(associativity);
+                }
+
+                let mut has_alias = false;
+                if let Some(alias) = params.alias {
+                    has_alias = true;
+                    self.alias_stack.push(alias);
+                }
+
+                if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
+                    self.production.dynamic_precedence = params.dynamic_precedence;
+                }
+
+                self.apply(*rule, at_end);
+
+                if has_precedence {
+                    self.precedence_stack.pop();
+                    if !at_end {
+                        self.production.steps.last_mut().unwrap().precedence =
+                            self.precedence_stack.last().cloned().unwrap_or(0);
+                    }
+                }
+
+                if has_associativity {
+                    self.associativity_stack.pop();
+                    if !at_end {
+                        self.production.steps.last_mut().unwrap().associativity =
+                            self.associativity_stack.last().cloned();
+                    }
+                }
+
+                if has_alias {
+                    self.alias_stack.pop();
+                }
+            }
+            Rule::Symbol(symbol) => {
+                self.production.steps.push(ProductionStep {
+                    symbol,
+                    precedence: self.precedence_stack.last().cloned().unwrap_or(0),
+                    associativity: self.associativity_stack.last().cloned(),
+                    alias: self.alias_stack.last().cloned(),
+                });
+            }
+            _ => (),
+        }
+    }
+}
+
+fn extract_choices(rule: Rule) -> Vec<Rule> {
+    match rule {
+        Rule::Seq(elements) => {
+            let mut result = vec![Rule::Blank];
+            for element in elements {
+                let extraction = extract_choices(element);
+                let mut next_result = Vec::new();
+                for entry in result {
+                    for extraction_entry in extraction.iter() {
+                        next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
+                    }
+                }
+                result = next_result;
+            }
+            result
+        }
+        Rule::Choice(elements) => {
+            let mut result = Vec::new();
+            for element in elements {
+                for rule in extract_choices(element) {
+                    result.push(rule);
+                }
+            }
+            result
+        }
+        Rule::Metadata { rule, params } => extract_choices(*rule)
+            .into_iter()
+            .map(|rule| Rule::Metadata {
+                rule: Box::new(rule),
+                params: params.clone(),
+            })
+            .collect(),
+        _ => vec![rule],
+    }
+}
+
+fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
+    let mut productions = Vec::new();
+    for rule in extract_choices(variable.rule) {
+        let production = RuleFlattener::new().flatten(rule);
+        if !productions.contains(&production) {
+            productions.push(production);
+        }
+    }
+    Ok(SyntaxVariable {
+        name: variable.name,
+        kind: variable.kind,
+        productions,
+    })
+}
 
 pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
-    unimplemented!();
+    let mut variables = Vec::new();
+    for variable in grammar.variables {
+        variables.push(flatten_variable(variable)?);
+    }
+    Ok(SyntaxGrammar {
+        extra_tokens: grammar.extra_tokens,
+        expected_conflicts: grammar.expected_conflicts,
+        variables_to_inline: grammar.variables_to_inline,
+        external_tokens: grammar.external_tokens,
+        word_token: grammar.word_token,
+        variables,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::VariableType;
+    use crate::rules::Symbol;
+
+    #[test]
+    fn test_flatten_grammar() {
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::prec_left(
+                    101,
+                    Rule::seq(vec![
+                        Rule::non_terminal(2),
+                        Rule::choice(vec![
+                            Rule::prec_right(
+                                102,
+                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
+                            ),
+                            Rule::non_terminal(5),
+                        ]),
+                        Rule::non_terminal(6),
+                    ]),
+                ),
+                Rule::non_terminal(7),
+            ]),
+        })
+        .unwrap();
+
+        assert_eq!(
+            result.productions,
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2))
+                            .with_prec(101, Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(3))
+                            .with_prec(102, Some(Associativity::Right)),
+                        ProductionStep::new(Symbol::non_terminal(4))
+                            .with_prec(101, Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ]
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2))
+                            .with_prec(101, Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(5))
+                            .with_prec(101, Some(Associativity::Left)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ]
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_flatten_grammar_with_maximum_dynamic_precedence() {
+        let result = flatten_variable(Variable {
+          name: "test".to_string(),
+          kind: VariableType::Named,
+          rule: Rule::seq(vec![
+            Rule::non_terminal(1),
+            Rule::prec_dynamic(101, Rule::seq(vec![
+              Rule::non_terminal(2),
+              Rule::choice(vec![
+                Rule::prec_dynamic(102, Rule::seq(vec![
+                  Rule::non_terminal(3),
+                  Rule::non_terminal(4)
+                ])),
+                Rule::non_terminal(5),
+              ]),
+              Rule::non_terminal(6),
+            ])),
+            Rule::non_terminal(7),
+          ])
+        }).unwrap();
+
+        assert_eq!(result.productions, vec![
+            Production {
+                dynamic_precedence: 102,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1)),
+                    ProductionStep::new(Symbol::non_terminal(2)),
+                    ProductionStep::new(Symbol::non_terminal(3)),
+                    ProductionStep::new(Symbol::non_terminal(4)),
+                    ProductionStep::new(Symbol::non_terminal(6)),
+                    ProductionStep::new(Symbol::non_terminal(7)),
+                ],
+            },
+            Production {
+                dynamic_precedence: 101,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1)),
+                    ProductionStep::new(Symbol::non_terminal(2)),
+                    ProductionStep::new(Symbol::non_terminal(5)),
+                    ProductionStep::new(Symbol::non_terminal(6)),
+                    ProductionStep::new(Symbol::non_terminal(7)),
+                ],
+            },
+        ]);
+    }
+
+    #[test]
+    fn test_flatten_grammar_with_final_precedence() {
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::prec_left(101, Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::non_terminal(2),
+            ])),
+        }).unwrap();
+
+        assert_eq!(result.productions, vec![
+            Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::non_terminal(2)).with_prec(101, Some(Associativity::Left)),
+                ]
+            }
+        ]);
+
+        let result = flatten_variable(Variable {
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::prec_left(101, Rule::seq(vec![
+                Rule::non_terminal(1),
+            ])),
+        }).unwrap();
+
+        assert_eq!(result.productions, vec![
+            Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
+                ]
+            }
+        ]);
+    }
 }
diff --git a/src/prepare_grammar/intern_symbols.rs b/src/prepare_grammar/intern_symbols.rs
index 17132262..5165875c 100644
--- a/src/prepare_grammar/intern_symbols.rs
+++ b/src/prepare_grammar/intern_symbols.rs
@@ -1,14 +1,15 @@
-use crate::error::{Error, Result};
-use crate::rules::{Rule, Symbol};
-use crate::grammars::{InputGrammar, Variable, VariableType};
-use std::rc::Rc;
 use super::InternedGrammar;
+use crate::error::{Error, Result};
+use crate::grammars::{InputGrammar, Variable, VariableType};
+use crate::rules::{Rule, Symbol};
 
 pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
     let interner = Interner { grammar };
 
     if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
-        return Err(Error::GrammarError("Grammar's start rule must be visible".to_string()));
+        return Err(Error::GrammarError(
+            "Grammar's start rule must be visible".to_string(),
+        ));
     }
 
     let mut variables = Vec::with_capacity(grammar.variables.len());
@@ -40,9 +41,10 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
     for conflict in grammar.expected_conflicts.iter() {
         let mut interned_conflict = Vec::with_capacity(conflict.len());
         for name in conflict {
-            interned_conflict.push(interner
-                .intern_name(&name)
-                .ok_or_else(|| symbol_error(name))?
+            interned_conflict.push(
+                interner
+                    .intern_name(&name)
+                    .ok_or_else(|| symbol_error(name))?,
             );
         }
         expected_conflicts.push(interned_conflict);
@@ -57,9 +59,10 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
 
     let mut word_token = None;
     if let Some(name) = grammar.word_token.as_ref() {
-        word_token = Some(interner
-            .intern_name(&name)
-            .ok_or_else(|| symbol_error(&name))?
+        word_token = Some(
+            interner
+                .intern_name(&name)
+                .ok_or_else(|| symbol_error(&name))?,
         );
     }
 
@@ -74,7 +77,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
 }
 
 struct Interner<'a> {
-    grammar: &'a InputGrammar
+    grammar: &'a InputGrammar,
 }
 
 impl<'a> Interner<'a> {
@@ -86,22 +89,19 @@ impl<'a> Interner<'a> {
                     result.push(self.intern_rule(element)?);
                 }
                 Ok(Rule::Choice(result))
-            },
+            }
             Rule::Seq(elements) => {
                 let mut result = Vec::with_capacity(elements.len());
                 for element in elements {
                     result.push(self.intern_rule(element)?);
                 }
                 Ok(Rule::Seq(result))
-            },
-            Rule::Repeat(content) => Ok(Rule::Repeat(
-                Box::new(self.intern_rule(content)?)
-            )),
-            Rule::Metadata { rule, params } =>
-                Ok(Rule::Metadata {
-                    rule: Box::new(self.intern_rule(rule)?),
-                    params: params.clone()
-                }),
+            }
+            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
+            Rule::Metadata { rule, params } => Ok(Rule::Metadata {
+                rule: Box::new(self.intern_rule(rule)?),
+                params: params.clone(),
+            }),
 
             Rule::NamedSymbol(name) => {
                 if let Some(symbol) = self.intern_name(&name) {
@@ -109,29 +109,28 @@ impl<'a> Interner<'a> {
                 } else {
                     Err(symbol_error(name))
                 }
-            },
-
-            _ => Ok(rule.clone())
+            }
 
+            _ => Ok(rule.clone()),
         }
     }
 
     fn intern_name(&self, symbol: &str) -> Option<Symbol> {
         for (i, variable) in self.grammar.variables.iter().enumerate() {
             if variable.name == symbol {
-                return Some(Symbol::non_terminal(i))
+                return Some(Symbol::non_terminal(i));
             }
         }
 
         for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
             if let Rule::NamedSymbol(name) = external_token {
                 if name == symbol {
-                    return Some(Symbol::external(i))
+                    return Some(Symbol::external(i));
                 }
             }
         }
 
-        return None
+        return None;
     }
 }
 
@@ -154,22 +153,23 @@ mod tests {
     #[test]
     fn test_basic_repeat_expansion() {
         let grammar = intern_symbols(&build_grammar(vec![
-            Variable::named("x", Rule::choice(vec![
-                Rule::named("y"),
-                Rule::named("_z"),
-            ])),
+            Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])),
             Variable::named("y", Rule::named("_z")),
             Variable::named("_z", Rule::string("a")),
-        ])).unwrap();
+        ]))
+        .unwrap();
 
-        assert_eq!(grammar.variables, vec![
-            Variable::named("x", Rule::choice(vec![
-                Rule::non_terminal(1),
-                Rule::non_terminal(2),
-            ])),
-            Variable::named("y", Rule::non_terminal(2)),
-            Variable::hidden("_z", Rule::string("a")),
-        ]);
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "x",
+                    Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),])
+                ),
+                Variable::named("y", Rule::non_terminal(2)),
+                Variable::hidden("_z", Rule::string("a")),
+            ]
+        );
     }
 
     #[test]
@@ -177,45 +177,50 @@ mod tests {
         // Variable `y` is both an internal and an external token.
         // Variable `z` is just an external token.
         let mut input_grammar = build_grammar(vec![
-            Variable::named("w", Rule::choice(vec![
-                Rule::named("x"),
-                Rule::named("y"),
-                Rule::named("z"),
-            ])),
+            Variable::named(
+                "w",
+                Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]),
+            ),
             Variable::named("x", Rule::string("a")),
             Variable::named("y", Rule::string("b")),
         ]);
-        input_grammar.external_tokens.extend(vec![
-            Rule::named("y"),
-            Rule::named("z"),
-        ]);
+        input_grammar
+            .external_tokens
+            .extend(vec![Rule::named("y"), Rule::named("z")]);
 
         let grammar = intern_symbols(&input_grammar).unwrap();
 
         // Variable `y` is referred to by its internal index.
         // Variable `z` is referred to by its external index.
-        assert_eq!(grammar.variables, vec![
-            Variable::named("w", Rule::choice(vec![
-                Rule::non_terminal(1),
-                Rule::non_terminal(2),
-                Rule::external(1),
-            ])),
-            Variable::named("x", Rule::string("a")),
-            Variable::named("y", Rule::string("b")),
-        ]);
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable::named(
+                    "w",
+                    Rule::choice(vec![
+                        Rule::non_terminal(1),
+                        Rule::non_terminal(2),
+                        Rule::external(1),
+                    ])
+                ),
+                Variable::named("x", Rule::string("a")),
+                Variable::named("y", Rule::string("b")),
+            ]
+        );
 
         // The external token for `y` refers back to its internal index.
-        assert_eq!(grammar.external_tokens, vec![
-            Variable::named("y", Rule::non_terminal(2)),
-            Variable::named("z", Rule::external(1)),
-        ]);
+        assert_eq!(
+            grammar.external_tokens,
+            vec![
+                Variable::named("y", Rule::non_terminal(2)),
+                Variable::named("z", Rule::external(1)),
+            ]
+        );
     }
 
     #[test]
     fn test_grammar_with_undefined_symbols() {
-        let result = intern_symbols(&build_grammar(vec![
-            Variable::named("x", Rule::named("y")),
-        ]));
+        let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
 
         match result {
             Err(Error::SymbolError(message)) => assert_eq!(message, "Undefined symbol 'y'"),
diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs
index e2615479..08233c53 100644
--- a/src/prepare_grammar/mod.rs
+++ b/src/prepare_grammar/mod.rs
@@ -1,19 +1,19 @@
-mod intern_symbols;
-mod extract_tokens;
 mod expand_repeats;
-mod flatten_grammar;
 mod expand_tokens;
 mod extract_simple_aliases;
+mod extract_tokens;
+mod flatten_grammar;
+mod intern_symbols;
 
-use crate::rules::{AliasMap, Rule, Symbol};
-use crate::grammars::{InputGrammar, SyntaxGrammar, LexicalGrammar, Variable, ExternalToken};
-use crate::error::Result;
-use self::intern_symbols::intern_symbols;
-use self::extract_tokens::extract_tokens;
 use self::expand_repeats::expand_repeats;
-use self::flatten_grammar::flatten_grammar;
 use self::expand_tokens::expand_tokens;
 use self::extract_simple_aliases::extract_simple_aliases;
+use self::extract_tokens::extract_tokens;
+use self::flatten_grammar::flatten_grammar;
+use self::intern_symbols::intern_symbols;
+use crate::error::Result;
+use crate::grammars::{ExternalToken, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable};
+use crate::rules::{AliasMap, Rule, Symbol};
 
 pub(self) struct IntermediateGrammar<T, U> {
     variables: Vec<Variable>,
@@ -35,7 +35,7 @@ pub(self) struct ExtractedLexicalGrammar {
 }
 
 pub(crate) fn prepare_grammar(
-    input_grammar: &InputGrammar
+    input_grammar: &InputGrammar,
 ) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> {
     let interned_grammar = intern_symbols(input_grammar)?;
     let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
diff --git a/src/rules.rs b/src/rules.rs
index c6f18cf4..5d0af86c 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -1,5 +1,3 @@
-use std::rc::Rc;
-use std::char;
 use std::collections::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@@ -92,6 +90,12 @@ impl Rule {
         })
     }
 
+    pub fn prec_dynamic(value: i32, content: Rule) -> Self {
+        add_metadata(content, |params| {
+            params.dynamic_precedence = value;
+        })
+    }
+
     pub fn repeat(rule: Rule) -> Self {
         Rule::Repeat(Box::new(rule))
     }

From 85347541f155736e423203944903033c76993187 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 11 Dec 2018 17:30:12 -0800
Subject: [PATCH 056/208] Allow PropertySheet selectors to match the root node

Co-Authored-By: Timothy Clem <timothy.clem@gmail.com>
---
 src/lib.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index d70dc607..ad31d3c4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -550,13 +550,16 @@ impl<'a> Drop for TreeCursor<'a> {
 
 impl<'a, P: DeserializeOwned> TreePropertyCursor<'a, P> {
     fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<P>, source: &'a str) -> Self {
-        Self {
+        let mut result = Self {
             cursor: tree.root_node().walk(),
             child_index_stack: vec![0],
             state_stack: vec![0],
             property_sheet,
             source,
-        }
+        };
+        let state = result.next_state(&result.current_state(), result.cursor.node().kind_id(), 0);
+        result.state_stack.push(state);
+        result
     }
 
     pub fn node(&self) -> Node<'a> {

From 40d24097ecdcc188f255a9fbb03adca05c5f39fd Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 11 Dec 2018 12:37:09 -0800
Subject: [PATCH 057/208] Implement extract_simple_aliases

---
 src/grammars.rs                               |   9 +
 src/prepare_grammar/extract_simple_aliases.rs | 191 +++++++++++++++++-
 src/prepare_grammar/mod.rs                    |   4 +-
 3 files changed, 199 insertions(+), 5 deletions(-)

diff --git a/src/grammars.rs b/src/grammars.rs
index 3b3d47f7..b76a583e 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -97,6 +97,15 @@ impl ProductionStep {
             alias: self.alias,
         }
     }
+
+    pub(crate) fn with_alias(self, value: &str, is_named: bool) -> Self {
+        Self {
+            symbol: self.symbol,
+            precedence: self.precedence,
+            associativity: self.associativity,
+            alias: Some(Alias { value: value.to_string(), is_named }),
+        }
+    }
 }
 
 impl Variable {
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs
index 2a175242..a10c7982 100644
--- a/src/prepare_grammar/extract_simple_aliases.rs
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@@ -1,9 +1,194 @@
-use crate::rules::AliasMap;
+use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
 
+#[derive(Clone, Default)]
+struct SymbolStatus {
+    alias: Option<Alias>,
+    conflicting: bool,
+}
+
 pub(super) fn extract_simple_aliases(
     syntax_grammar: &mut SyntaxGrammar,
-    lexical_grammar: &mut LexicalGrammar
+    lexical_grammar: &LexicalGrammar
 ) -> AliasMap {
-    unimplemented!();
+    // Determine which symbols in the grammars are *always* aliased to a single name.
+    let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
+    let mut non_terminal_status_list = vec![SymbolStatus::default(); syntax_grammar.variables.len()];
+    let mut external_status_list = vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
+    for variable in syntax_grammar.variables.iter() {
+        for production in variable.productions.iter() {
+            for step in production.steps.iter() {
+                let mut status = match step.symbol {
+                    Symbol { kind: SymbolType::External, index} => &mut external_status_list[index],
+                    Symbol { kind: SymbolType::NonTerminal, index} => &mut non_terminal_status_list[index],
+                    Symbol { kind: SymbolType::Terminal, index} => &mut terminal_status_list[index],
+                };
+
+                if step.alias.is_none() {
+                    status.alias = None;
+                    status.conflicting = true;
+                }
+
+                if !status.conflicting {
+                    if status.alias.is_none() {
+                        status.alias = step.alias.clone();
+                    } else if status.alias != step.alias {
+                        status.alias = None;
+                        status.conflicting = true;
+                    }
+                }
+            }
+        }
+    }
+
+    // Remove the aliases for those symbols.
+    for variable in syntax_grammar.variables.iter_mut() {
+        for production in variable.productions.iter_mut() {
+            for step in production.steps.iter_mut() {
+                let status = match step.symbol {
+                    Symbol { kind: SymbolType::External, index} => &external_status_list[index],
+                    Symbol { kind: SymbolType::NonTerminal, index} => &non_terminal_status_list[index],
+                    Symbol { kind: SymbolType::Terminal, index} => &terminal_status_list[index],
+                };
+
+                if status.alias.is_some() {
+                    step.alias = None;
+                }
+            }
+        }
+    }
+
+    // Populate a map of the symbols to their aliases.
+    let mut result = AliasMap::new();
+    for (i, status) in terminal_status_list.into_iter().enumerate() {
+        if let Some(alias) = status.alias {
+            result.insert(Symbol::terminal(i), alias);
+        }
+    }
+    for (i, status) in non_terminal_status_list.into_iter().enumerate() {
+        if let Some(alias) = status.alias {
+            result.insert(Symbol::non_terminal(i), alias);
+        }
+    }
+    for (i, status) in external_status_list.into_iter().enumerate() {
+        if let Some(alias) = status.alias {
+            result.insert(Symbol::external(i), alias);
+        }
+    }
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep};
+    use crate::nfa::Nfa;
+
+    #[test]
+    fn test_extract_simple_aliases() {
+        let mut syntax_grammar = SyntaxGrammar {
+            variables: vec![
+                SyntaxVariable {
+                    name: "v1".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+                                ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                                ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
+                            ],
+                        },
+                    ],
+                },
+                SyntaxVariable {
+                    name: "v2".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                // Token 0 is always aliased as "a1".
+                                ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+
+                                // Token 1 is aliased above, but not here.
+                                ProductionStep::new(Symbol::terminal(1)),
+
+                                // Token 2 is aliased differently than above.
+                                ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
+                            ],
+                        },
+                    ],
+                },
+            ],
+            extra_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+
+        let lexical_grammar = LexicalGrammar {
+            variables: vec![
+                LexicalVariable {
+                    name: "t1".to_string(),
+                    kind: VariableType::Anonymous,
+                    nfa: Nfa::new(),
+                },
+                LexicalVariable {
+                    name: "t2".to_string(),
+                    kind: VariableType::Anonymous,
+                    nfa: Nfa::new(),
+                },
+                LexicalVariable {
+                    name: "t3".to_string(),
+                    kind: VariableType::Anonymous,
+                    nfa: Nfa::new(),
+                }
+            ],
+            separators: Vec::new(),
+        };
+
+        let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
+        assert_eq!(simple_aliases.len(), 1);
+        assert_eq!(simple_aliases[&Symbol::terminal(0)], Alias {
+            value: "a1".to_string(),
+            is_named: true,
+        });
+
+        assert_eq!(syntax_grammar.variables, vec![
+            SyntaxVariable {
+                name: "v1".to_owned(),
+                kind: VariableType::Named,
+                productions: vec![
+                    Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            // 'Simple' alias removed
+                            ProductionStep::new(Symbol::terminal(0)),
+
+                            // Other aliases unchanged
+                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
+                        ],
+                    },
+                ],
+            },
+            SyntaxVariable {
+                name: "v2".to_owned(),
+                kind: VariableType::Named,
+                productions: vec![
+                    Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)),
+                            ProductionStep::new(Symbol::terminal(1)),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
+                        ],
+                    },
+                ],
+            },
+        ]);
+    }
 }
diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs
index 08233c53..22435fca 100644
--- a/src/prepare_grammar/mod.rs
+++ b/src/prepare_grammar/mod.rs
@@ -41,7 +41,7 @@ pub(crate) fn prepare_grammar(
     let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
     let syntax_grammar = expand_repeats(syntax_grammar);
     let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
-    let mut lexical_grammar = expand_tokens(lexical_grammar)?;
-    let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &mut lexical_grammar);
+    let lexical_grammar = expand_tokens(lexical_grammar)?;
+    let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
     Ok((syntax_grammar, lexical_grammar, simple_aliases))
 }

From 0103a83f3f88cb8745706517a96f32c01ef1286a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 12 Dec 2018 18:04:29 -0800
Subject: [PATCH 058/208] Integrate separator rules into lexer nfa

---
 src/grammars.rs                               |   4 +-
 src/nfa.rs                                    |  44 +--
 src/prepare_grammar/expand_tokens.rs          | 265 +++++++++++-------
 src/prepare_grammar/extract_simple_aliases.rs |   8 +-
 src/rules.rs                                  |   1 -
 5 files changed, 199 insertions(+), 123 deletions(-)

diff --git a/src/grammars.rs b/src/grammars.rs
index b76a583e..74c213e1 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -35,13 +35,13 @@ pub(crate) struct InputGrammar {
 pub(crate) struct LexicalVariable {
     pub name: String,
     pub kind: VariableType,
-    pub nfa: Nfa,
+    pub start_state: u32,
 }
 
 #[derive(Debug, PartialEq, Eq)]
 pub(crate) struct LexicalGrammar {
+    pub nfa: Nfa,
     pub variables: Vec<LexicalVariable>,
-    pub separators: Vec<Nfa>,
 }
 
 // Extracted syntax grammar
diff --git a/src/nfa.rs b/src/nfa.rs
index 22cb2a2e..66861434 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -9,9 +9,13 @@ pub enum CharacterSet {
 
 #[derive(Debug, PartialEq, Eq)]
 pub enum NfaState {
-    Advance(CharacterSet, u32),
+    Advance {
+        chars: CharacterSet,
+        state: u32,
+        is_sep: bool,
+    },
     Split(u32, u32),
-    Accept,
+    Accept(usize),
 }
 
 #[derive(PartialEq, Eq)]
@@ -23,6 +27,7 @@ pub struct Nfa {
 pub struct NfaCursor<'a> {
     indices: Vec<u32>,
     nfa: &'a Nfa,
+    in_sep: bool,
 }
 
 impl CharacterSet {
@@ -88,15 +93,15 @@ impl CharacterSet {
 
 impl Nfa {
     pub fn new() -> Self {
-        Nfa { states: vec![NfaState::Accept] }
+        Nfa { states: Vec::new() }
     }
 
-    pub fn start_index(&self) -> u32 {
+    pub fn last_state(&self) -> u32 {
         self.states.len() as u32 - 1
     }
 
     pub fn prepend(&mut self, f: impl Fn(u32) -> NfaState) {
-        self.states.push(f(self.start_index()));
+        self.states.push(f(self.last_state()));
     }
 }
 
@@ -116,38 +121,45 @@ impl fmt::Debug for Nfa {
 
 impl<'a> NfaCursor<'a> {
     pub fn new(nfa: &'a Nfa) -> Self {
-        let mut result = Self { nfa, indices: Vec::new() };
-        result.add_indices(&mut vec![nfa.start_index()]);
+        let mut result = Self { nfa, indices: Vec::new(), in_sep: true };
+        result.add_states(&mut vec![nfa.last_state()]);
         result
     }
 
     pub fn advance(&mut self, c: char) -> bool {
         let mut result = false;
         let mut new_indices = Vec::new();
+        let mut any_sep_transitions = false;
         for index in &self.indices {
-            if let NfaState::Advance(chars, next_index) = &self.nfa.states[*index as usize] {
+            if let NfaState::Advance { chars, state, is_sep } = &self.nfa.states[*index as usize] {
+                if *is_sep {
+                    any_sep_transitions = true;
+                }
                 if chars.contains(c) {
-                    new_indices.push(*next_index);
+                    new_indices.push(*state);
                     result = true;
                 }
             }
         }
+        if !any_sep_transitions {
+            self.in_sep = false;
+        }
         self.indices.clear();
-        self.add_indices(&mut new_indices);
+        self.add_states(&mut new_indices);
         result
     }
 
-    pub fn is_done(&self) -> bool {
-        self.indices.iter().any(|index| {
-            if let NfaState::Accept = self.nfa.states[*index as usize] {
-                true
+    pub fn finished_ids<'b>(&'b self) -> impl Iterator<Item = usize> + 'b {
+        self.indices.iter().filter_map(move |index| {
+            if let NfaState::Accept(i) = self.nfa.states[*index as usize] {
+                Some(i)
             } else {
-                false
+                None
             }
         })
     }
 
-    pub fn add_indices(&mut self, new_indices: &mut Vec<u32>) {
+    pub fn add_states(&mut self, new_indices: &mut Vec<u32>) {
         while let Some(index) = new_indices.pop() {
             let state = &self.nfa.states[index as usize];
             if let NfaState::Split(left, right) = state {
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index e0e1f9a9..3019b2be 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -39,40 +39,46 @@ fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
     }
 }
 
-fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
+fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: bool) -> Result<bool> {
     match ast {
-        Ast::Empty(_) => Ok(()),
+        Ast::Empty(_) => Ok(false),
         Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
         Ast::Literal(literal) => {
-            nfa.states.push(NfaState::Advance(
-                CharacterSet::Include(vec![literal.c]),
-                next_state_index,
-            ));
-            Ok(())
+            nfa.states.push(NfaState::Advance {
+                chars: CharacterSet::Include(vec![literal.c]),
+                state: next_state_index,
+                is_sep,
+            });
+            Ok(true)
         }
         Ast::Dot(_) => {
-            nfa.states.push(NfaState::Advance(
-                CharacterSet::Exclude(vec!['\n']),
-                next_state_index,
-            ));
-            Ok(())
+            nfa.states.push(NfaState::Advance {
+                chars: CharacterSet::Exclude(vec!['\n']),
+                state: next_state_index,
+                is_sep,
+            });
+            Ok(true)
         }
         Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
         Ast::Class(class) => match class {
             Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
             Class::Perl(class) => {
-                nfa.states.push(NfaState::Advance(
-                    expand_perl_character_class(&class.kind),
-                    next_state_index,
-                ));
-                Ok(())
+                nfa.states.push(NfaState::Advance {
+                    chars: expand_perl_character_class(&class.kind),
+                    state: next_state_index,
+                    is_sep,
+                });
+                Ok(true)
             }
             Class::Bracketed(class) => match &class.kind {
                 ClassSet::Item(item) => {
                     let character_set = expand_character_class(&item)?;
-                    nfa.states
-                        .push(NfaState::Advance(character_set, next_state_index));
-                    Ok(())
+                    nfa.states.push(NfaState::Advance {
+                        chars: character_set,
+                        state: next_state_index,
+                        is_sep,
+                    });
+                    Ok(true)
                 }
                 ClassSet::BinaryOp(_) => Err(Error::regex(
                     "Binary operators in character classes aren't supported",
@@ -81,134 +87,171 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32) -> Result<(
         },
         Ast::Repetition(repetition) => match repetition.op.kind {
             RepetitionKind::ZeroOrOne => {
-                expand_regex(&repetition.ast, nfa, next_state_index)?;
-                nfa.prepend(|start_index| NfaState::Split(next_state_index, start_index));
-                Ok(())
+                if expand_regex(&repetition.ast, nfa, next_state_index, is_sep)? {
+                    nfa.prepend(|last_state| NfaState::Split(next_state_index, last_state));
+                    Ok(true)
+                } else {
+                    Ok(false)
+                }
             }
             RepetitionKind::OneOrMore => {
-                nfa.states.push(NfaState::Accept); // Placeholder for split
-                let split_index = nfa.start_index();
-                expand_regex(&repetition.ast, nfa, split_index)?;
-                nfa.states[split_index as usize] =
-                    NfaState::Split(nfa.start_index(), next_state_index);
-                Ok(())
+                nfa.states.push(NfaState::Accept(0)); // Placeholder for split
+                let split_index = nfa.last_state();
+                if expand_regex(&repetition.ast, nfa, split_index, is_sep)? {
+                    nfa.states[split_index as usize] =
+                        NfaState::Split(nfa.last_state(), next_state_index);
+                    Ok(true)
+                } else {
+                    nfa.states.pop();
+                    Ok(false)
+                }
             }
             RepetitionKind::ZeroOrMore => {
-                nfa.states.push(NfaState::Accept); // Placeholder for split
-                let split_index = nfa.start_index();
-                expand_regex(&repetition.ast, nfa, split_index)?;
-                nfa.states[split_index as usize] =
-                    NfaState::Split(nfa.start_index(), next_state_index);
-                nfa.prepend(|start_index| NfaState::Split(start_index, next_state_index));
-                Ok(())
+                nfa.states.push(NfaState::Accept(0)); // Placeholder for split
+                let split_index = nfa.last_state();
+                if expand_regex(&repetition.ast, nfa, split_index, is_sep)? {
+                    nfa.states[split_index as usize] =
+                        NfaState::Split(nfa.last_state(), next_state_index);
+                    nfa.prepend(|last_state| NfaState::Split(last_state, next_state_index));
+                    Ok(true)
+                } else {
+                    Ok(false)
+                }
             }
             RepetitionKind::Range(_) => unimplemented!(),
         },
-        Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.start_index()),
+        Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.last_state(), is_sep),
         Ast::Alternation(alternation) => {
             let mut alternative_start_indices = Vec::new();
             for ast in alternation.asts.iter() {
-                expand_regex(&ast, nfa, next_state_index)?;
-                alternative_start_indices.push(nfa.start_index());
+                if expand_regex(&ast, nfa, next_state_index, is_sep)? {
+                    alternative_start_indices.push(nfa.last_state());
+                }
             }
             alternative_start_indices.pop();
             for alternative_start_index in alternative_start_indices {
-                nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
+                nfa.prepend(|last_state| NfaState::Split(last_state, alternative_start_index));
             }
-            Ok(())
+            Ok(true)
         }
         Ast::Concat(concat) => {
+            let mut result = false;
             for ast in concat.asts.iter().rev() {
-                expand_regex(&ast, nfa, next_state_index)?;
-                next_state_index = nfa.start_index();
+                if expand_regex(&ast, nfa, next_state_index, is_sep)? {
+                    result = true;
+                }
+                next_state_index = nfa.last_state();
             }
-            Ok(())
+            Ok(result)
         }
     }
 }
 
-fn expand_rule(rule: Rule, nfa: &mut Nfa, mut next_state_index: u32) -> Result<()> {
+fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_index: u32, is_sep: bool) -> Result<bool> {
     match rule {
         Rule::Pattern(s) => {
             let ast = parse::Parser::new()
                 .parse(&s)
                 .map_err(|e| Error::GrammarError(e.to_string()))?;
-            expand_regex(&ast, nfa, next_state_index)?;
-            Ok(())
+            expand_regex(&ast, nfa, next_state_index, is_sep)
         }
         Rule::String(s) => {
             for c in s.chars().rev() {
-                nfa.prepend(|start_index| {
-                    NfaState::Advance(CharacterSet::empty().add_char(c), start_index)
+                nfa.prepend(|last_state| {
+                    NfaState::Advance {
+                        chars: CharacterSet::empty().add_char(c),
+                        state: last_state,
+                        is_sep,
+                    }
                 });
             }
-            Ok(())
+            Ok(s.len() > 0)
         }
         Rule::Choice(elements) => {
             let mut alternative_start_indices = Vec::new();
             for element in elements {
-                expand_rule(element, nfa, next_state_index)?;
-                alternative_start_indices.push(nfa.start_index());
+                if expand_rule(element, nfa, next_state_index, is_sep)? {
+                    alternative_start_indices.push(nfa.last_state());
+                }
             }
             alternative_start_indices.pop();
             for alternative_start_index in alternative_start_indices {
-                nfa.prepend(|start_index| NfaState::Split(start_index, alternative_start_index));
+                nfa.prepend(|last_state| NfaState::Split(last_state, alternative_start_index));
             }
-            Ok(())
+            Ok(true)
         }
         Rule::Seq(elements) => {
+            let mut result = false;
             for element in elements.into_iter().rev() {
-                expand_rule(element, nfa, next_state_index)?;
-                next_state_index = nfa.start_index();
+                if expand_rule(element, nfa, next_state_index, is_sep)? {
+                    result = true;
+                }
+                next_state_index = nfa.last_state();
             }
-            Ok(())
+            Ok(result)
         }
         Rule::Repeat(rule) => {
-            nfa.states.push(NfaState::Accept); // Placeholder for split
-            let split_index = nfa.start_index();
-            expand_rule(*rule, nfa, split_index)?;
-            nfa.states[split_index as usize] = NfaState::Split(nfa.start_index(), next_state_index);
-            Ok(())
+            nfa.states.push(NfaState::Accept(0)); // Placeholder for split
+            let split_index = nfa.last_state();
+            if expand_rule(rule, nfa, split_index, is_sep)? {
+                nfa.states[split_index as usize] = NfaState::Split(nfa.last_state(), next_state_index);
+                Ok(true)
+            } else {
+                Ok(false)
+            }
         }
-        _ => Err(Error::grammar("Unexpected rule type")),
+        Rule::Blank => Ok(false),
+        _ => Err(Error::grammar(&format!("Unexpected rule {:?}", rule))),
     }
 }
 
 pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+    let mut nfa = Nfa::new();
+
+    let separator_rule = if grammar.separators.len() > 0 {
+        Rule::repeat(Rule::choice(grammar.separators))
+    } else {
+        Rule::Blank
+    };
+
     let mut variables = Vec::new();
-    for variable in grammar.variables {
-        let mut nfa = Nfa::new();
-        expand_rule(variable.rule, &mut nfa, 0)?;
+    for (i, variable) in grammar.variables.into_iter().enumerate() {
+        let is_immediate_token = match &variable.rule {
+            Rule::Metadata { params, .. } => params.is_main_token,
+            _ => false,
+        };
+
+        nfa.states.push(NfaState::Accept(i));
+        let last_state = nfa.last_state();
+        expand_rule(&variable.rule, &mut nfa, last_state, false)?;
+
+        if !is_immediate_token {
+            let last_state = nfa.last_state();
+            expand_rule(&separator_rule, &mut nfa, last_state, true)?;
+        }
+
         variables.push(LexicalVariable {
             name: variable.name,
             kind: variable.kind,
-            nfa,
+            start_state: nfa.last_state(),
         });
     }
-    let mut separators = Vec::new();
-    for separator in grammar.separators {
-        let mut nfa = Nfa::new();
-        expand_rule(separator, &mut nfa, 0)?;
-        separators.push(nfa);
-    }
 
-    Ok(LexicalGrammar {
-        variables,
-        separators,
-    })
+    Ok(LexicalGrammar { nfa, variables })
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::nfa::NfaCursor;
+    use crate::grammars::Variable;
 
     fn simulate_nfa<'a>(nfa: &'a Nfa, s: &'a str) -> Option<&'a str> {
         let mut result = None;
         let mut char_count = 0;
         let mut cursor = NfaCursor::new(nfa);
         for c in s.chars() {
-            if cursor.is_done() {
+            if cursor.finished_ids().count() > 0 {
                 result = Some(&s[0..char_count]);
             }
             if cursor.advance(c) {
@@ -223,13 +266,13 @@ mod tests {
     #[test]
     fn test_rule_expansion() {
         struct Row {
-            rule: Rule,
+            rules: Vec<Rule>,
             examples: Vec<(&'static str, Option<&'static str>)>,
         }
 
         let table = [
             Row {
-                rule: Rule::pattern("a|bc"),
+                rules: vec![Rule::pattern("a|bc")],
                 examples: vec![
                     ("a12", Some("a")),
                     ("bc12", Some("bc")),
@@ -238,7 +281,7 @@ mod tests {
                 ],
             },
             Row {
-                rule: Rule::pattern("(a|b|c)d(e|f|g)h?"),
+                rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
                 examples: vec![
                     ("ade1", Some("ade")),
                     ("bdf1", Some("bdf")),
@@ -247,11 +290,14 @@ mod tests {
                 ],
             },
             Row {
-                rule: Rule::pattern("a*"),
-                examples: vec![("aaa1", Some("aaa")), ("b", Some(""))],
+                rules: vec![Rule::pattern("a*")],
+                examples: vec![
+                    ("aaa1", Some("aaa")),
+                    ("b", Some("")),
+                ],
             },
             Row {
-                rule: Rule::pattern("a((bc)+|(de)*)f"),
+                rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
                 examples: vec![
                     ("af1", Some("af")),
                     ("adedef1", Some("adedef")),
@@ -260,32 +306,51 @@ mod tests {
                 ],
             },
             Row {
-                rule: Rule::pattern("[a-fA-F0-9]+"),
-                examples: vec![("A1ff0", Some("A1ff"))],
+                rules: vec![Rule::pattern("[a-fA-F0-9]+")],
+                examples: vec![
+                    ("A1ff0", Some("A1ff")),
+                ],
             },
             Row {
-                rule: Rule::pattern("\\w\\d\\s"),
-                examples: vec![("_0  ", Some("_0 "))],
+                rules: vec![Rule::pattern("\\w\\d\\s")],
+                examples: vec![
+                    ("_0  ", Some("_0 ")),
+                ],
             },
             Row {
-                rule: Rule::string("abc"),
-                examples: vec![("abcd", Some("abc")), ("ab", None)],
+                rules: vec![Rule::string("abc")],
+                examples: vec![
+                    ("abcd", Some("abc")),
+                    ("ab", None)
+                ],
             },
             Row {
-                rule: Rule::repeat(Rule::seq(vec![
-                    Rule::string("{"),
-                    Rule::pattern("[a-f]+"),
-                    Rule::string("}"),
-                ])),
-                examples: vec![("{a}{", Some("{a}")), ("{a}{d", Some("{a}")), ("ab", None)],
+                rules: vec![
+                    Rule::repeat(Rule::seq(vec![
+                        Rule::string("{"),
+                        Rule::pattern("[a-f]+"),
+                        Rule::string("}"),
+                    ])),
+                ],
+                examples: vec![
+                    ("{a}{", Some("{a}")),
+                    ("{a}{d", Some("{a}")),
+                    ("ab", None),
+                ],
             },
         ];
 
-        for Row { rule, examples } in table.iter() {
-            let mut nfa = Nfa::new();
-            expand_rule(rule.clone(), &mut nfa, 0).unwrap();
+        for Row { rules, examples } in &table {
+            let grammar = expand_tokens(ExtractedLexicalGrammar {
+                separators: vec![],
+                variables: rules
+                    .into_iter()
+                    .map(|rule| Variable::named("", rule.clone()))
+                    .collect(),
+            }).unwrap();
+
             for (haystack, needle) in examples.iter() {
-                assert_eq!(simulate_nfa(&nfa, haystack), *needle);
+                assert_eq!(simulate_nfa(&grammar.nfa, haystack), *needle);
             }
         }
     }
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs
index a10c7982..8b87ea2e 100644
--- a/src/prepare_grammar/extract_simple_aliases.rs
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@@ -130,24 +130,24 @@ mod tests {
         };
 
         let lexical_grammar = LexicalGrammar {
+            nfa: Nfa::new(),
             variables: vec![
                 LexicalVariable {
                     name: "t1".to_string(),
                     kind: VariableType::Anonymous,
-                    nfa: Nfa::new(),
+                    start_state: 0,
                 },
                 LexicalVariable {
                     name: "t2".to_string(),
                     kind: VariableType::Anonymous,
-                    nfa: Nfa::new(),
+                    start_state: 0,
                 },
                 LexicalVariable {
                     name: "t3".to_string(),
                     kind: VariableType::Anonymous,
-                    nfa: Nfa::new(),
+                    start_state: 0,
                 }
             ],
-            separators: Vec::new(),
         };
 
         let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
diff --git a/src/rules.rs b/src/rules.rs
index 5d0af86c..d7234f45 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -30,7 +30,6 @@ pub(crate) struct MetadataParams {
     pub is_string: bool,
     pub is_active: bool,
     pub is_main_token: bool,
-    pub is_excluded: bool,
     pub alias: Option<Alias>,
 }
 

From 842421633c1161351ec0ba764be8927d09b15728 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 12 Dec 2018 20:58:26 -0800
Subject: [PATCH 059/208] Fix bugs in nfa generation

---
 src/nfa.rs                           |  95 +++++++----
 src/prepare_grammar/expand_tokens.rs | 230 ++++++++++++++++++---------
 2 files changed, 212 insertions(+), 113 deletions(-)

diff --git a/src/nfa.rs b/src/nfa.rs
index 66861434..bc084ede 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -11,7 +11,7 @@ pub enum CharacterSet {
 pub enum NfaState {
     Advance {
         chars: CharacterSet,
-        state: u32,
+        state_id: u32,
         is_sep: bool,
     },
     Split(u32, u32),
@@ -25,7 +25,7 @@ pub struct Nfa {
 
 #[derive(Debug)]
 pub struct NfaCursor<'a> {
-    indices: Vec<u32>,
+    pub(crate) state_ids: Vec<u32>,
     nfa: &'a Nfa,
     in_sep: bool,
 }
@@ -96,23 +96,20 @@ impl Nfa {
         Nfa { states: Vec::new() }
     }
 
-    pub fn last_state(&self) -> u32 {
+    pub fn last_state_id(&self) -> u32 {
         self.states.len() as u32 - 1
     }
 
     pub fn prepend(&mut self, f: impl Fn(u32) -> NfaState) {
-        self.states.push(f(self.last_state()));
+        self.states.push(f(self.last_state_id()));
     }
 }
 
 impl fmt::Debug for Nfa {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "Nfa {{ states: {{")?;
+        write!(f, "Nfa {{ states: {{\n")?;
         for (i, state) in self.states.iter().enumerate() {
-            if i > 0 {
-                write!(f, ", ")?;
-            }
-            write!(f, "{}: {:?}", i, state)?;
+            write!(f, "  {}: {:?},\n", i, state)?;
         }
         write!(f, "}} }}")?;
         Ok(())
@@ -120,23 +117,23 @@ impl fmt::Debug for Nfa {
 }
 
 impl<'a> NfaCursor<'a> {
-    pub fn new(nfa: &'a Nfa) -> Self {
-        let mut result = Self { nfa, indices: Vec::new(), in_sep: true };
-        result.add_states(&mut vec![nfa.last_state()]);
+    pub fn new(nfa: &'a Nfa, mut states: Vec<u32>) -> Self {
+        let mut result = Self { nfa, state_ids: Vec::new(), in_sep: true };
+        result.add_states(&mut states);
         result
     }
 
     pub fn advance(&mut self, c: char) -> bool {
         let mut result = false;
-        let mut new_indices = Vec::new();
+        let mut new_state_ids = Vec::new();
         let mut any_sep_transitions = false;
-        for index in &self.indices {
-            if let NfaState::Advance { chars, state, is_sep } = &self.nfa.states[*index as usize] {
-                if *is_sep {
-                    any_sep_transitions = true;
-                }
+        for current_state_id in &self.state_ids {
+            if let NfaState::Advance { chars, state_id, is_sep } = &self.nfa.states[*current_state_id as usize] {
                 if chars.contains(c) {
-                    new_indices.push(*state);
+                    if *is_sep {
+                        any_sep_transitions = true;
+                    }
+                    new_state_ids.push(*state_id);
                     result = true;
                 }
             }
@@ -144,30 +141,58 @@ impl<'a> NfaCursor<'a> {
         if !any_sep_transitions {
             self.in_sep = false;
         }
-        self.indices.clear();
-        self.add_states(&mut new_indices);
+        self.state_ids.clear();
+        self.add_states(&mut new_state_ids);
         result
     }
 
-    pub fn finished_ids<'b>(&'b self) -> impl Iterator<Item = usize> + 'b {
-        self.indices.iter().filter_map(move |index| {
-            if let NfaState::Accept(i) = self.nfa.states[*index as usize] {
-                Some(i)
-            } else {
-                None
+    pub fn finished_id(&self) -> Option<usize> {
+        let mut result = None;
+        for state_id in self.state_ids.iter() {
+            if let NfaState::Accept(id) = self.nfa.states[*state_id as usize] {
+                match result {
+                    None => {
+                        result = Some(id)
+                    },
+                    Some(existing_id) => if id < existing_id {
+                        result = Some(id)
+                    }
+                }
             }
-        })
+        }
+        result
     }
 
-    pub fn add_states(&mut self, new_indices: &mut Vec<u32>) {
-        while let Some(index) = new_indices.pop() {
-            let state = &self.nfa.states[index as usize];
+    pub fn in_separator(&self) -> bool {
+        self.in_sep
+    }
+
+    pub fn add_states(&mut self, new_state_ids: &mut Vec<u32>) {
+        let mut i = 0;
+        while i < new_state_ids.len() {
+            let state_id = new_state_ids[i];
+            let state = &self.nfa.states[state_id as usize];
             if let NfaState::Split(left, right) = state {
-                new_indices.push(*left);
-                new_indices.push(*right);
-            } else if let Err(i) = self.indices.binary_search(&index) {
-                self.indices.insert(i, index);
+                let mut has_left = false;
+                let mut has_right = false;
+                for new_state_id in new_state_ids.iter() {
+                    if *new_state_id == *left {
+                        has_left = true;
+                    }
+                    if *new_state_id == *right {
+                        has_right = true;
+                    }
+                }
+                if !has_left {
+                    new_state_ids.push(*left);
+                }
+                if !has_right {
+                    new_state_ids.push(*right);
+                }
+            } else if let Err(i) = self.state_ids.binary_search(&state_id) {
+                self.state_ids.insert(i, state_id);
             }
+            i += 1;
         }
     }
 }
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 3019b2be..8b8cd03a 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -39,14 +39,14 @@ fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
     }
 }
 
-fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: bool) -> Result<bool> {
+fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool) -> Result<bool> {
     match ast {
         Ast::Empty(_) => Ok(false),
         Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
         Ast::Literal(literal) => {
             nfa.states.push(NfaState::Advance {
                 chars: CharacterSet::Include(vec![literal.c]),
-                state: next_state_index,
+                state_id: next_state_id,
                 is_sep,
             });
             Ok(true)
@@ -54,7 +54,7 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: boo
         Ast::Dot(_) => {
             nfa.states.push(NfaState::Advance {
                 chars: CharacterSet::Exclude(vec!['\n']),
-                state: next_state_index,
+                state_id: next_state_id,
                 is_sep,
             });
             Ok(true)
@@ -65,7 +65,7 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: boo
             Class::Perl(class) => {
                 nfa.states.push(NfaState::Advance {
                     chars: expand_perl_character_class(&class.kind),
-                    state: next_state_index,
+                    state_id: next_state_id,
                     is_sep,
                 });
                 Ok(true)
@@ -75,7 +75,7 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: boo
                     let character_set = expand_character_class(&item)?;
                     nfa.states.push(NfaState::Advance {
                         chars: character_set,
-                        state: next_state_index,
+                        state_id: next_state_id,
                         is_sep,
                     });
                     Ok(true)
@@ -87,8 +87,8 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: boo
         },
         Ast::Repetition(repetition) => match repetition.op.kind {
             RepetitionKind::ZeroOrOne => {
-                if expand_regex(&repetition.ast, nfa, next_state_index, is_sep)? {
-                    nfa.prepend(|last_state| NfaState::Split(next_state_index, last_state));
+                if expand_regex(&repetition.ast, nfa, next_state_id, is_sep)? {
+                    nfa.prepend(|last_state_id| NfaState::Split(next_state_id, last_state_id));
                     Ok(true)
                 } else {
                     Ok(false)
@@ -96,10 +96,10 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: boo
             }
             RepetitionKind::OneOrMore => {
                 nfa.states.push(NfaState::Accept(0)); // Placeholder for split
-                let split_index = nfa.last_state();
-                if expand_regex(&repetition.ast, nfa, split_index, is_sep)? {
-                    nfa.states[split_index as usize] =
-                        NfaState::Split(nfa.last_state(), next_state_index);
+                let split_state_id = nfa.last_state_id();
+                if expand_regex(&repetition.ast, nfa, split_state_id, is_sep)? {
+                    nfa.states[split_state_id as usize] =
+                        NfaState::Split(nfa.last_state_id(), next_state_id);
                     Ok(true)
                 } else {
                     nfa.states.pop();
@@ -108,11 +108,11 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: boo
             }
             RepetitionKind::ZeroOrMore => {
                 nfa.states.push(NfaState::Accept(0)); // Placeholder for split
-                let split_index = nfa.last_state();
-                if expand_regex(&repetition.ast, nfa, split_index, is_sep)? {
-                    nfa.states[split_index as usize] =
-                        NfaState::Split(nfa.last_state(), next_state_index);
-                    nfa.prepend(|last_state| NfaState::Split(last_state, next_state_index));
+                let split_state_id = nfa.last_state_id();
+                if expand_regex(&repetition.ast, nfa, split_state_id, is_sep)? {
+                    nfa.states[split_state_id as usize] =
+                        NfaState::Split(nfa.last_state_id(), next_state_id);
+                    nfa.prepend(|last_state_id| NfaState::Split(last_state_id, next_state_id));
                     Ok(true)
                 } else {
                     Ok(false)
@@ -120,47 +120,49 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_index: u32, is_sep: boo
             }
             RepetitionKind::Range(_) => unimplemented!(),
         },
-        Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.last_state(), is_sep),
+        Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.last_state_id(), is_sep),
         Ast::Alternation(alternation) => {
-            let mut alternative_start_indices = Vec::new();
+            let mut alternative_state_ids = Vec::new();
             for ast in alternation.asts.iter() {
-                if expand_regex(&ast, nfa, next_state_index, is_sep)? {
-                    alternative_start_indices.push(nfa.last_state());
+                if expand_regex(&ast, nfa, next_state_id, is_sep)? {
+                    alternative_state_ids.push(nfa.last_state_id());
+                } else {
+                    alternative_state_ids.push(next_state_id);
                 }
             }
-            alternative_start_indices.pop();
-            for alternative_start_index in alternative_start_indices {
-                nfa.prepend(|last_state| NfaState::Split(last_state, alternative_start_index));
+            alternative_state_ids.retain(|i| *i != nfa.last_state_id());
+            for alternative_state_id in alternative_state_ids {
+                nfa.prepend(|last_state_id| NfaState::Split(last_state_id, alternative_state_id));
             }
             Ok(true)
         }
         Ast::Concat(concat) => {
             let mut result = false;
             for ast in concat.asts.iter().rev() {
-                if expand_regex(&ast, nfa, next_state_index, is_sep)? {
+                if expand_regex(&ast, nfa, next_state_id, is_sep)? {
                     result = true;
                 }
-                next_state_index = nfa.last_state();
+                next_state_id = nfa.last_state_id();
             }
             Ok(result)
         }
     }
 }
 
-fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_index: u32, is_sep: bool) -> Result<bool> {
+fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool) -> Result<bool> {
     match rule {
         Rule::Pattern(s) => {
             let ast = parse::Parser::new()
                 .parse(&s)
                 .map_err(|e| Error::GrammarError(e.to_string()))?;
-            expand_regex(&ast, nfa, next_state_index, is_sep)
+            expand_regex(&ast, nfa, next_state_id, is_sep)
         }
         Rule::String(s) => {
             for c in s.chars().rev() {
-                nfa.prepend(|last_state| {
+                nfa.prepend(|last_state_id| {
                     NfaState::Advance {
                         chars: CharacterSet::empty().add_char(c),
-                        state: last_state,
+                        state_id: last_state_id,
                         is_sep,
                     }
                 });
@@ -168,33 +170,35 @@ fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_index: u32, is_sep: bo
             Ok(s.len() > 0)
         }
         Rule::Choice(elements) => {
-            let mut alternative_start_indices = Vec::new();
+            let mut alternative_state_ids = Vec::new();
             for element in elements {
-                if expand_rule(element, nfa, next_state_index, is_sep)? {
-                    alternative_start_indices.push(nfa.last_state());
+                if expand_rule(element, nfa, next_state_id, is_sep)? {
+                    alternative_state_ids.push(nfa.last_state_id());
+                } else {
+                    alternative_state_ids.push(next_state_id);
                 }
             }
-            alternative_start_indices.pop();
-            for alternative_start_index in alternative_start_indices {
-                nfa.prepend(|last_state| NfaState::Split(last_state, alternative_start_index));
+            alternative_state_ids.retain(|i| *i != nfa.last_state_id());
+            for alternative_state_id in alternative_state_ids {
+                nfa.prepend(|last_state_id| NfaState::Split(last_state_id, alternative_state_id));
             }
             Ok(true)
         }
         Rule::Seq(elements) => {
             let mut result = false;
             for element in elements.into_iter().rev() {
-                if expand_rule(element, nfa, next_state_index, is_sep)? {
+                if expand_rule(element, nfa, next_state_id, is_sep)? {
                     result = true;
                 }
-                next_state_index = nfa.last_state();
+                next_state_id = nfa.last_state_id();
             }
             Ok(result)
         }
         Rule::Repeat(rule) => {
             nfa.states.push(NfaState::Accept(0)); // Placeholder for split
-            let split_index = nfa.last_state();
-            if expand_rule(rule, nfa, split_index, is_sep)? {
-                nfa.states[split_index as usize] = NfaState::Split(nfa.last_state(), next_state_index);
+            let split_state_id = nfa.last_state_id();
+            if expand_rule(rule, nfa, split_state_id, is_sep)? {
+                nfa.states[split_state_id as usize] = NfaState::Split(nfa.last_state_id(), next_state_id);
                 Ok(true)
             } else {
                 Ok(false)
@@ -205,10 +209,11 @@ fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_index: u32, is_sep: bo
     }
 }
 
-pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+pub(super) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
     let mut nfa = Nfa::new();
 
     let separator_rule = if grammar.separators.len() > 0 {
+        grammar.separators.push(Rule::Blank);
         Rule::repeat(Rule::choice(grammar.separators))
     } else {
         Rule::Blank
@@ -222,18 +227,18 @@ pub(super) fn expand_tokens(grammar: ExtractedLexicalGrammar) -> Result<LexicalG
         };
 
         nfa.states.push(NfaState::Accept(i));
-        let last_state = nfa.last_state();
-        expand_rule(&variable.rule, &mut nfa, last_state, false)?;
+        let last_state_id = nfa.last_state_id();
+        expand_rule(&variable.rule, &mut nfa, last_state_id, false)?;
 
         if !is_immediate_token {
-            let last_state = nfa.last_state();
-            expand_rule(&separator_rule, &mut nfa, last_state, true)?;
+            let last_state_id = nfa.last_state_id();
+            expand_rule(&separator_rule, &mut nfa, last_state_id, true)?;
         }
 
         variables.push(LexicalVariable {
             name: variable.name,
             kind: variable.kind,
-            start_state: nfa.last_state(),
+            start_state: nfa.last_state_id(),
         });
     }
 
@@ -246,20 +251,31 @@ mod tests {
     use crate::nfa::NfaCursor;
     use crate::grammars::Variable;
 
-    fn simulate_nfa<'a>(nfa: &'a Nfa, s: &'a str) -> Option<&'a str> {
+    fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
+        let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
+        let mut cursor = NfaCursor::new(&grammar.nfa, start_states);
+
         let mut result = None;
-        let mut char_count = 0;
-        let mut cursor = NfaCursor::new(nfa);
+        let mut start_char = 0;
+        let mut end_char = 0;
         for c in s.chars() {
-            if cursor.finished_ids().count() > 0 {
-                result = Some(&s[0..char_count]);
+            if let Some(id) = cursor.finished_id() {
+                result = Some((id, &s[start_char..end_char]));
             }
             if cursor.advance(c) {
-                char_count += 1;
+                end_char += 1;
+                if cursor.in_separator() {
+                    start_char = end_char;
+                }
             } else {
                 break;
             }
         }
+
+        if let Some(id) = cursor.finished_id() {
+            result = Some((id, &s[start_char..end_char]));
+        }
+
         result
     }
 
@@ -267,63 +283,74 @@ mod tests {
     fn test_rule_expansion() {
         struct Row {
             rules: Vec<Rule>,
-            examples: Vec<(&'static str, Option<&'static str>)>,
+            separators: Vec<Rule>,
+            examples: Vec<(&'static str, Option<(usize, &'static str)>)>,
         }
 
         let table = [
-            Row {
-                rules: vec![Rule::pattern("a|bc")],
-                examples: vec![
-                    ("a12", Some("a")),
-                    ("bc12", Some("bc")),
-                    ("b12", None),
-                    ("c12", None),
-                ],
-            },
+            // regex with sequences and alternatives
             Row {
                 rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
+                separators: vec![],
                 examples: vec![
-                    ("ade1", Some("ade")),
-                    ("bdf1", Some("bdf")),
-                    ("bdfh1", Some("bdfh")),
+                    ("ade1", Some((0, "ade"))),
+                    ("bdf1", Some((0, "bdf"))),
+                    ("bdfh1", Some((0, "bdfh"))),
                     ("ad1", None),
                 ],
             },
+
+            // regex with repeats
             Row {
                 rules: vec![Rule::pattern("a*")],
+                separators: vec![],
                 examples: vec![
-                    ("aaa1", Some("aaa")),
-                    ("b", Some("")),
+                    ("aaa1", Some((0, "aaa"))),
+                    ("b", Some((0, ""))),
                 ],
             },
+
+            // regex with repeats in sequences
             Row {
                 rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
+                separators: vec![],
                 examples: vec![
-                    ("af1", Some("af")),
-                    ("adedef1", Some("adedef")),
-                    ("abcbcbcf1", Some("abcbcbcf")),
+                    ("af1", Some((0, "af"))),
+                    ("adedef1", Some((0, "adedef"))),
+                    ("abcbcbcf1", Some((0, "abcbcbcf"))),
                     ("a", None),
                 ],
             },
+
+            // regex with character ranges
             Row {
                 rules: vec![Rule::pattern("[a-fA-F0-9]+")],
+                separators: vec![],
                 examples: vec![
-                    ("A1ff0", Some("A1ff")),
+                    ("A1ff0.", Some((0, "A1ff0"))),
                 ],
             },
+
+            // regex with perl character classes
             Row {
                 rules: vec![Rule::pattern("\\w\\d\\s")],
+                separators: vec![],
                 examples: vec![
-                    ("_0  ", Some("_0 ")),
+                    ("_0  ", Some((0, "_0 "))),
                 ],
             },
+
+            // string
             Row {
                 rules: vec![Rule::string("abc")],
+                separators: vec![],
                 examples: vec![
-                    ("abcd", Some("abc")),
+                    ("abcd", Some((0, "abc"))),
                     ("ab", None)
                 ],
             },
+
+            // complex rule containing strings and regexes
             Row {
                 rules: vec![
                     Rule::repeat(Rule::seq(vec![
@@ -332,17 +359,64 @@ mod tests {
                         Rule::string("}"),
                     ])),
                 ],
+                separators: vec![],
                 examples: vec![
-                    ("{a}{", Some("{a}")),
-                    ("{a}{d", Some("{a}")),
+                    ("{a}{", Some((0, "{a}"))),
+                    ("{a}{d", Some((0, "{a}"))),
                     ("ab", None),
                 ],
             },
+
+            // longest match rule
+            Row {
+                rules: vec![
+                    Rule::pattern("a|bc"),
+                    Rule::pattern("aa"),
+                    Rule::pattern("bcd"),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("a.", Some((0, "a"))),
+                    ("bc.", Some((0, "bc"))),
+                    ("aa.", Some((1, "aa"))),
+                    ("bcd?", Some((2, "bcd"))),
+                    ("b.", None),
+                    ("c.", None),
+                ],
+            },
+
+            // regexes with alternatives including the empty string
+            Row {
+                rules: vec![Rule::pattern("a(b|)+c")],
+                separators: vec![],
+                examples: vec![
+                    ("ac.", Some((0, "ac"))),
+                    ("abc.", Some((0, "abc"))),
+                    ("abbc.", Some((0, "abbc"))),
+                ],
+            },
+
+            // separators
+            Row {
+                rules: vec![
+                    Rule::pattern("[a-f]+"),
+                ],
+                separators: vec![
+                    Rule::string("\\\n"),
+                    Rule::pattern("\\s"),
+                ],
+                examples: vec![
+                    ("  a", Some((0, "a"))),
+                    ("  \nb", Some((0, "b"))),
+                    ("  \\a", None),
+                    ("  \\\na", Some((0, "a"))),
+                ],
+            },
         ];
 
-        for Row { rules, examples } in &table {
+        for Row { rules, separators, examples } in &table {
             let grammar = expand_tokens(ExtractedLexicalGrammar {
-                separators: vec![],
+                separators: separators.clone(),
                 variables: rules
                     .into_iter()
                     .map(|rule| Variable::named("", rule.clone()))
@@ -350,7 +424,7 @@ mod tests {
             }).unwrap();
 
             for (haystack, needle) in examples.iter() {
-                assert_eq!(simulate_nfa(&grammar.nfa, haystack), *needle);
+                assert_eq!(simulate_nfa(&grammar, haystack), *needle);
             }
         }
     }

From 5fa586f7c92916db288e258c91a0424e3af04f30 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 12 Dec 2018 21:01:41 -0800
Subject: [PATCH 060/208] Format expand_tokens file

---
 src/prepare_grammar/expand_tokens.rs | 281 +++++++++++++--------------
 1 file changed, 130 insertions(+), 151 deletions(-)

diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 8b8cd03a..7a1d2f4d 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -5,37 +5,98 @@ use crate::nfa::{CharacterSet, Nfa, NfaState};
 use crate::rules::Rule;
 use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};
 
-fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
-    match item {
-        ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
-        ClassPerlKind::Space => CharacterSet::empty()
-            .add_char(' ')
-            .add_char('\t')
-            .add_char('\r')
-            .add_char('\n'),
-        ClassPerlKind::Word => CharacterSet::empty()
-            .add_char('_')
-            .add_range('A', 'Z')
-            .add_range('a', 'z')
-            .add_range('0', '9'),
+pub(super) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+    let mut nfa = Nfa::new();
+
+    let separator_rule = if grammar.separators.len() > 0 {
+        grammar.separators.push(Rule::Blank);
+        Rule::repeat(Rule::choice(grammar.separators))
+    } else {
+        Rule::Blank
+    };
+
+    let mut variables = Vec::new();
+    for (i, variable) in grammar.variables.into_iter().enumerate() {
+        let is_immediate_token = match &variable.rule {
+            Rule::Metadata { params, .. } => params.is_main_token,
+            _ => false,
+        };
+
+        nfa.states.push(NfaState::Accept(i));
+        let last_state_id = nfa.last_state_id();
+        expand_rule(&variable.rule, &mut nfa, last_state_id, false)?;
+
+        if !is_immediate_token {
+            let last_state_id = nfa.last_state_id();
+            expand_rule(&separator_rule, &mut nfa, last_state_id, true)?;
+        }
+
+        variables.push(LexicalVariable {
+            name: variable.name,
+            kind: variable.kind,
+            start_state: nfa.last_state_id(),
+        });
     }
+
+    Ok(LexicalGrammar { nfa, variables })
 }
 
-fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
-    match item {
-        ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
-        ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
-        ClassSetItem::Range(range) => {
-            Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
+fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool) -> Result<bool> {
+    match rule {
+        Rule::Pattern(s) => {
+            let ast = parse::Parser::new()
+                .parse(&s)
+                .map_err(|e| Error::GrammarError(e.to_string()))?;
+            expand_regex(&ast, nfa, next_state_id, is_sep)
         }
-        ClassSetItem::Union(union) => {
-            let mut result = CharacterSet::empty();
-            for item in &union.items {
-                result = result.add(expand_character_class(&item)?);
+        Rule::String(s) => {
+            for c in s.chars().rev() {
+                nfa.prepend(|last_state_id| NfaState::Advance {
+                    chars: CharacterSet::empty().add_char(c),
+                    state_id: last_state_id,
+                    is_sep,
+                });
+            }
+            Ok(s.len() > 0)
+        }
+        Rule::Choice(elements) => {
+            let mut alternative_state_ids = Vec::new();
+            for element in elements {
+                if expand_rule(element, nfa, next_state_id, is_sep)? {
+                    alternative_state_ids.push(nfa.last_state_id());
+                } else {
+                    alternative_state_ids.push(next_state_id);
+                }
+            }
+            alternative_state_ids.retain(|i| *i != nfa.last_state_id());
+            for alternative_state_id in alternative_state_ids {
+                nfa.prepend(|last_state_id| NfaState::Split(last_state_id, alternative_state_id));
+            }
+            Ok(true)
+        }
+        Rule::Seq(elements) => {
+            let mut result = false;
+            for element in elements.into_iter().rev() {
+                if expand_rule(element, nfa, next_state_id, is_sep)? {
+                    result = true;
+                }
+                next_state_id = nfa.last_state_id();
             }
             Ok(result)
         }
-        _ => Err(Error::regex("Unsupported character class syntax")),
+        Rule::Repeat(rule) => {
+            nfa.states.push(NfaState::Accept(0)); // Placeholder for split
+            let split_state_id = nfa.last_state_id();
+            if expand_rule(rule, nfa, split_state_id, is_sep)? {
+                nfa.states[split_state_id as usize] =
+                    NfaState::Split(nfa.last_state_id(), next_state_id);
+                Ok(true)
+            } else {
+                Ok(false)
+            }
+        }
+        Rule::Blank => Ok(false),
+        _ => Err(Error::grammar(&format!("Unexpected rule {:?}", rule))),
     }
 }
 
@@ -149,107 +210,45 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool)
     }
 }
 
-fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool) -> Result<bool> {
-    match rule {
-        Rule::Pattern(s) => {
-            let ast = parse::Parser::new()
-                .parse(&s)
-                .map_err(|e| Error::GrammarError(e.to_string()))?;
-            expand_regex(&ast, nfa, next_state_id, is_sep)
+fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
+    match item {
+        ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
+        ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
+        ClassSetItem::Range(range) => {
+            Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
         }
-        Rule::String(s) => {
-            for c in s.chars().rev() {
-                nfa.prepend(|last_state_id| {
-                    NfaState::Advance {
-                        chars: CharacterSet::empty().add_char(c),
-                        state_id: last_state_id,
-                        is_sep,
-                    }
-                });
-            }
-            Ok(s.len() > 0)
-        }
-        Rule::Choice(elements) => {
-            let mut alternative_state_ids = Vec::new();
-            for element in elements {
-                if expand_rule(element, nfa, next_state_id, is_sep)? {
-                    alternative_state_ids.push(nfa.last_state_id());
-                } else {
-                    alternative_state_ids.push(next_state_id);
-                }
-            }
-            alternative_state_ids.retain(|i| *i != nfa.last_state_id());
-            for alternative_state_id in alternative_state_ids {
-                nfa.prepend(|last_state_id| NfaState::Split(last_state_id, alternative_state_id));
-            }
-            Ok(true)
-        }
-        Rule::Seq(elements) => {
-            let mut result = false;
-            for element in elements.into_iter().rev() {
-                if expand_rule(element, nfa, next_state_id, is_sep)? {
-                    result = true;
-                }
-                next_state_id = nfa.last_state_id();
+        ClassSetItem::Union(union) => {
+            let mut result = CharacterSet::empty();
+            for item in &union.items {
+                result = result.add(expand_character_class(&item)?);
             }
             Ok(result)
         }
-        Rule::Repeat(rule) => {
-            nfa.states.push(NfaState::Accept(0)); // Placeholder for split
-            let split_state_id = nfa.last_state_id();
-            if expand_rule(rule, nfa, split_state_id, is_sep)? {
-                nfa.states[split_state_id as usize] = NfaState::Split(nfa.last_state_id(), next_state_id);
-                Ok(true)
-            } else {
-                Ok(false)
-            }
-        }
-        Rule::Blank => Ok(false),
-        _ => Err(Error::grammar(&format!("Unexpected rule {:?}", rule))),
+        _ => Err(Error::regex("Unsupported character class syntax")),
     }
 }
 
-pub(super) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
-    let mut nfa = Nfa::new();
-
-    let separator_rule = if grammar.separators.len() > 0 {
-        grammar.separators.push(Rule::Blank);
-        Rule::repeat(Rule::choice(grammar.separators))
-    } else {
-        Rule::Blank
-    };
-
-    let mut variables = Vec::new();
-    for (i, variable) in grammar.variables.into_iter().enumerate() {
-        let is_immediate_token = match &variable.rule {
-            Rule::Metadata { params, .. } => params.is_main_token,
-            _ => false,
-        };
-
-        nfa.states.push(NfaState::Accept(i));
-        let last_state_id = nfa.last_state_id();
-        expand_rule(&variable.rule, &mut nfa, last_state_id, false)?;
-
-        if !is_immediate_token {
-            let last_state_id = nfa.last_state_id();
-            expand_rule(&separator_rule, &mut nfa, last_state_id, true)?;
-        }
-
-        variables.push(LexicalVariable {
-            name: variable.name,
-            kind: variable.kind,
-            start_state: nfa.last_state_id(),
-        });
+fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
+    match item {
+        ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
+        ClassPerlKind::Space => CharacterSet::empty()
+            .add_char(' ')
+            .add_char('\t')
+            .add_char('\r')
+            .add_char('\n'),
+        ClassPerlKind::Word => CharacterSet::empty()
+            .add_char('_')
+            .add_range('A', 'Z')
+            .add_range('a', 'z')
+            .add_range('0', '9'),
     }
-
-    Ok(LexicalGrammar { nfa, variables })
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::nfa::NfaCursor;
     use crate::grammars::Variable;
+    use crate::nfa::NfaCursor;
 
     fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
         let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
@@ -299,17 +298,12 @@ mod tests {
                     ("ad1", None),
                 ],
             },
-
             // regex with repeats
             Row {
                 rules: vec![Rule::pattern("a*")],
                 separators: vec![],
-                examples: vec![
-                    ("aaa1", Some((0, "aaa"))),
-                    ("b", Some((0, ""))),
-                ],
+                examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
             },
-
             // regex with repeats in sequences
             Row {
                 rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
@@ -321,44 +315,31 @@ mod tests {
                     ("a", None),
                 ],
             },
-
             // regex with character ranges
             Row {
                 rules: vec![Rule::pattern("[a-fA-F0-9]+")],
                 separators: vec![],
-                examples: vec![
-                    ("A1ff0.", Some((0, "A1ff0"))),
-                ],
+                examples: vec![("A1ff0.", Some((0, "A1ff0")))],
             },
-
             // regex with perl character classes
             Row {
                 rules: vec![Rule::pattern("\\w\\d\\s")],
                 separators: vec![],
-                examples: vec![
-                    ("_0  ", Some((0, "_0 "))),
-                ],
+                examples: vec![("_0  ", Some((0, "_0 ")))],
             },
-
             // string
             Row {
                 rules: vec![Rule::string("abc")],
                 separators: vec![],
-                examples: vec![
-                    ("abcd", Some((0, "abc"))),
-                    ("ab", None)
-                ],
+                examples: vec![("abcd", Some((0, "abc"))), ("ab", None)],
             },
-
             // complex rule containing strings and regexes
             Row {
-                rules: vec![
-                    Rule::repeat(Rule::seq(vec![
-                        Rule::string("{"),
-                        Rule::pattern("[a-f]+"),
-                        Rule::string("}"),
-                    ])),
-                ],
+                rules: vec![Rule::repeat(Rule::seq(vec![
+                    Rule::string("{"),
+                    Rule::pattern("[a-f]+"),
+                    Rule::string("}"),
+                ]))],
                 separators: vec![],
                 examples: vec![
                     ("{a}{", Some((0, "{a}"))),
@@ -366,7 +347,6 @@ mod tests {
                     ("ab", None),
                 ],
             },
-
             // longest match rule
             Row {
                 rules: vec![
@@ -384,8 +364,7 @@ mod tests {
                     ("c.", None),
                 ],
             },
-
-            // regexes with alternatives including the empty string
+            // regex with an alternative including the empty string
             Row {
                 rules: vec![Rule::pattern("a(b|)+c")],
                 separators: vec![],
@@ -395,16 +374,10 @@ mod tests {
                     ("abbc.", Some((0, "abbc"))),
                 ],
             },
-
             // separators
             Row {
-                rules: vec![
-                    Rule::pattern("[a-f]+"),
-                ],
-                separators: vec![
-                    Rule::string("\\\n"),
-                    Rule::pattern("\\s"),
-                ],
+                rules: vec![Rule::pattern("[a-f]+")],
+                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
                 examples: vec![
                     ("  a", Some((0, "a"))),
                     ("  \nb", Some((0, "b"))),
@@ -414,14 +387,20 @@ mod tests {
             },
         ];
 
-        for Row { rules, separators, examples } in &table {
+        for Row {
+            rules,
+            separators,
+            examples,
+        } in &table
+        {
             let grammar = expand_tokens(ExtractedLexicalGrammar {
                 separators: separators.clone(),
                 variables: rules
                     .into_iter()
                     .map(|rule| Variable::named("", rule.clone()))
                     .collect(),
-            }).unwrap();
+            })
+            .unwrap();
 
             for (haystack, needle) in examples.iter() {
                 assert_eq!(simulate_nfa(&grammar, haystack), *needle);

From 494329c93b4c54b583e68634132e1f45b383e91f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 13 Dec 2018 10:08:25 -0800
Subject: [PATCH 061/208] Add Parser.set_included_ranges and Node.range

---
 src/lib.rs | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index ad31d3c4..98d2234e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,6 +35,14 @@ pub struct Point {
     pub column: usize,
 }
 
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Range {
+    pub start_byte: usize,
+    pub end_byte: usize,
+    pub start_point: Point,
+    pub end_point: Point,
+}
+
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct InputEdit {
     pub start_byte: usize,
@@ -252,6 +260,14 @@ impl Parser {
         unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
     }
 
+    pub fn set_included_ranges(&mut self, ranges: &[Range]) {
+        let ts_ranges: Vec<ffi::TSRange> =
+            ranges.iter().cloned().map(|range| range.into()).collect();
+        unsafe {
+            ffi::ts_parser_set_included_ranges(self.0, ts_ranges.as_ptr(), ts_ranges.len() as u32)
+        };
+    }
+
     fn parse_utf8_ptr<T: FnMut(usize, Point) -> (*const u8, usize)>(
         &mut self,
         input: &mut T,
@@ -421,6 +437,15 @@ impl<'tree> Node<'tree> {
         unsafe { ffi::ts_node_end_byte(self.0) as usize }
     }
 
+    pub fn range(&self) -> Range {
+        Range {
+            start_byte: self.start_byte(),
+            end_byte: self.end_byte(),
+            start_point: self.start_position(),
+            end_point: self.end_position(),
+        }
+    }
+
     pub fn start_position(&self) -> Point {
         let result = unsafe { ffi::ts_node_start_point(self.0) };
         result.into()
@@ -677,6 +702,17 @@ impl From<ffi::TSPoint> for Point {
     }
 }
 
+impl Into<ffi::TSRange> for Range {
+    fn into(self) -> ffi::TSRange {
+        ffi::TSRange {
+            start_byte: self.start_byte as u32,
+            end_byte: self.end_byte as u32,
+            start_point: self.start_point.into(),
+            end_point: self.end_point.into(),
+        }
+    }
+}
+
 impl<P: DeserializeOwned> PropertySheet<P> {
     pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError> {
         #[derive(Deserialize, Debug)]

From 4a361fbb3fafa41ffa1247501f8199938e5aab6c Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 13 Dec 2018 10:08:50 -0800
Subject: [PATCH 062/208] Implement Copy for Node

---
 src/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/lib.rs b/src/lib.rs
index 98d2234e..428e8101 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -77,6 +77,7 @@ pub struct PropertySheet<P: DeserializeOwned = HashMap<String, String>> {
     text_regexes: Vec<Regex>,
 }
 
+#[derive(Clone, Copy)]
 pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
 
 pub struct Parser(*mut ffi::TSParser);

From bdd3f20522eefe01831ad9cd74002dfe95de20d1 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 13 Dec 2018 16:30:40 -0800
Subject: [PATCH 063/208] Add PropertySheet::map method

---
 src/lib.rs | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 428e8101..0a53e320 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -71,7 +71,7 @@ pub enum PropertySheetError {
     InvalidRegex(regex::Error)
 }
 
-pub struct PropertySheet<P: DeserializeOwned = HashMap<String, String>> {
+pub struct PropertySheet<P = HashMap<String, String>> {
     states: Vec<PropertyState>,
     property_sets: Vec<P>,
     text_regexes: Vec<Regex>,
@@ -86,7 +86,7 @@ pub struct Tree(*mut ffi::TSTree);
 
 pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
 
-pub struct TreePropertyCursor<'a, P: 'a + DeserializeOwned> {
+pub struct TreePropertyCursor<'a, P> {
     cursor: TreeCursor<'a>,
     state_stack: Vec<usize>,
     child_index_stack: Vec<usize>,
@@ -370,7 +370,7 @@ impl Tree {
         self.root_node().walk()
     }
 
-    pub fn walk_with_properties<'a, P: DeserializeOwned>(
+    pub fn walk_with_properties<'a, P>(
         &'a self,
         property_sheet: &'a PropertySheet<P>,
         source: &'a str,
@@ -574,7 +574,7 @@ impl<'a> Drop for TreeCursor<'a> {
     }
 }
 
-impl<'a, P: DeserializeOwned> TreePropertyCursor<'a, P> {
+impl<'a, P> TreePropertyCursor<'a, P> {
     fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<P>, source: &'a str) -> Self {
         let mut result = Self {
             cursor: tree.root_node().walk(),
@@ -714,8 +714,11 @@ impl Into<ffi::TSRange> for Range {
     }
 }
 
-impl<P: DeserializeOwned> PropertySheet<P> {
-    pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError> {
+impl<P> PropertySheet<P> {
+    pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError>
+    where
+        P: DeserializeOwned,
+    {
         #[derive(Deserialize, Debug)]
         struct PropertyTransitionJSON {
             #[serde(rename = "type")]
@@ -787,6 +790,21 @@ impl<P: DeserializeOwned> PropertySheet<P> {
             text_regexes,
         })
     }
+
+    pub fn map<F, T, E>(self, mut f: F) -> Result<PropertySheet<T>, E>
+    where
+        F: FnMut(P) -> Result<T, E>,
+    {
+        let mut property_sets = Vec::with_capacity(self.property_sets.len());
+        for set in self.property_sets {
+            property_sets.push(f(set)?);
+        }
+        Ok(PropertySheet {
+            states: self.states,
+            text_regexes: self.text_regexes,
+            property_sets,
+        })
+    }
 }
 
 #[cfg(test)]

From 6d3835d292e7bc37965ad5623c3688c4862ee4b1 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 13 Dec 2018 16:32:10 -0800
Subject: [PATCH 064/208] Add Node::children method

---
 src/lib.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index 0a53e320..f1a83203 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -465,6 +465,12 @@ impl<'tree> Node<'tree> {
         unsafe { ffi::ts_node_child_count(self.0) as usize }
     }
 
+    pub fn children<'a>(&'a self) -> impl Iterator<Item = Node<'tree>> + 'a {
+        (0..self.child_count())
+            .into_iter()
+            .map(move |i| self.child(i).unwrap())
+    }
+
     pub fn named_child<'a>(&'a self, i: usize) -> Option<Self> {
         Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) })
     }

From 3f1fc65a2736a573920c4139a844d99187ebb894 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 13 Dec 2018 16:32:22 -0800
Subject: [PATCH 065/208] Auto-format lib.rs

---
 src/lib.rs | 59 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index f1a83203..65a57d16 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,12 +2,12 @@ mod ffi;
 
 #[macro_use]
 extern crate serde_derive;
-extern crate serde_json;
 extern crate regex;
 extern crate serde;
+extern crate serde_json;
 
-use serde::de::DeserializeOwned;
 use regex::Regex;
+use serde::de::DeserializeOwned;
 use std::collections::HashMap;
 use std::ffi::CStr;
 use std::fmt;
@@ -68,7 +68,7 @@ struct PropertyState {
 #[derive(Debug)]
 pub enum PropertySheetError {
     InvalidJSON(serde_json::Error),
-    InvalidRegex(regex::Error)
+    InvalidRegex(regex::Error),
 }
 
 pub struct PropertySheet<P = HashMap<String, String>> {
@@ -187,7 +187,16 @@ impl Parser {
 
     pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
         let bytes = input.as_bytes();
-        self.parse_utf8(&mut |offset, _| &bytes[offset..], old_tree)
+        self.parse_utf8(
+            &mut |offset, _| {
+                if offset < bytes.len() {
+                    &bytes[offset..]
+                } else {
+                    &[]
+                }
+            },
+            old_tree,
+        )
     }
 
     pub fn parse_utf8<'a, T: FnMut(usize, Point) -> &'a [u8]>(
@@ -565,7 +574,8 @@ impl<'a> TreeCursor<'a> {
     }
 
     pub fn goto_first_child_for_index(&mut self, index: usize) -> Option<usize> {
-        let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) };
+        let result =
+            unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) };
         if result < 0 {
             None
         } else {
@@ -645,7 +655,12 @@ impl<'a, P> TreePropertyCursor<'a, P> {
         }
     }
 
-    fn next_state(&self, state: &PropertyState, node_kind_id: u16, node_child_index: usize) -> usize {
+    fn next_state(
+        &self,
+        state: &PropertyState,
+        node_kind_id: u16,
+        node_child_index: usize,
+    ) -> usize {
         state
             .transitions
             .get(&node_kind_id)
@@ -748,8 +763,8 @@ impl<P> PropertySheet<P> {
             property_sets: Vec<P>,
         }
 
-        let input: PropertySheetJSON<P> = serde_json::from_str(json)
-            .map_err(PropertySheetError::InvalidJSON)?;
+        let input: PropertySheetJSON<P> =
+            serde_json::from_str(json).map_err(PropertySheetError::InvalidJSON)?;
         let mut states = Vec::new();
         let mut text_regexes = Vec::new();
         let mut text_regex_patterns = Vec::new();
@@ -759,11 +774,15 @@ impl<P> PropertySheet<P> {
             let node_kind_count = language.node_kind_count();
             for transition in state.transitions.iter() {
                 let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() {
-                    if let Some(index) = text_regex_patterns.iter().position(|r| *r == regex_pattern) {
+                    if let Some(index) =
+                        text_regex_patterns.iter().position(|r| *r == regex_pattern)
+                    {
                         Some(index)
                     } else {
                         text_regex_patterns.push(regex_pattern);
-                        text_regexes.push(Regex::new(&regex_pattern).map_err(PropertySheetError::InvalidRegex)?);
+                        text_regexes.push(
+                            Regex::new(&regex_pattern).map_err(PropertySheetError::InvalidRegex)?,
+                        );
                         Some(text_regexes.len() - 1)
                     }
                 } else {
@@ -771,9 +790,8 @@ impl<P> PropertySheet<P> {
                 };
 
                 for i in 0..(node_kind_count as u16) {
-                    if
-                        transition.kind == language.node_kind_for_id(i) &&
-                        transition.named == language.node_kind_is_named(i)
+                    if transition.kind == language.node_kind_for_id(i)
+                        && transition.named == language.node_kind_is_named(i)
                     {
                         let entry = transitions.entry(i).or_insert(Vec::new());
                         entry.push(PropertyTransition {
@@ -928,7 +946,10 @@ mod tests {
             define: Option<String>,
         }
 
-        let empty_properties = Properties { reference: None, define: None };
+        let empty_properties = Properties {
+            reference: None,
+            define: None,
+        };
 
         let property_sheet = PropertySheet::<Properties>::new(
             rust(),
@@ -1018,7 +1039,10 @@ mod tests {
 
         assert!(cursor.goto_first_child());
         assert_eq!(cursor.node().kind(), "identifier");
-        assert_eq!(cursor.node_properties().reference, Some("function".to_owned()));
+        assert_eq!(
+            cursor.node_properties().reference,
+            Some("function".to_owned())
+        );
     }
 
     #[test]
@@ -1097,7 +1121,10 @@ mod tests {
 
         assert!(cursor.goto_first_child());
         assert_eq!(cursor.node().kind(), "identifier");
-        assert_eq!(cursor.node_properties().scope, Some("constructor".to_owned()));
+        assert_eq!(
+            cursor.node_properties().scope,
+            Some("constructor".to_owned())
+        );
     }
 
     #[test]

From d79203f58c7e3bb06232385a6da701ed5dfde739 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 13 Dec 2018 16:42:46 -0800
Subject: [PATCH 066/208] Add test script

---
 script/test.sh | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100755 script/test.sh

diff --git a/script/test.sh b/script/test.sh
new file mode 100755
index 00000000..eb6183c0
--- /dev/null
+++ b/script/test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+RUST_TREE_SITTER_TEST=1 cargo test $@

From 7bd9eaa97065c3153ae44d1f219d3bfc741e82a6 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 13 Dec 2018 16:43:44 -0800
Subject: [PATCH 067/208] 0.3.5

---
 Cargo.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index fde4fd31..7f0458ec 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,9 +1,8 @@
 [package]
 name = "tree-sitter"
 description = "Rust bindings to the Tree-sitter parsing library"
-version = "0.3.4"
+version = "0.3.5"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
-build = "build.rs"
 license = "MIT"
 readme = "README.md"
 keywords = ["incremental", "parsing"]

From 889f232b4ca2cbdc932510bb75da6f686059eceb Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 18 Dec 2018 16:05:36 -0800
Subject: [PATCH 068/208] Implement variable inlining

---
 Cargo.lock                           |  15 +-
 Cargo.toml                           |   3 +-
 src/build_tables/inline_variables.rs | 318 +++++++++++++++++++++++++++
 src/build_tables/item.rs             | 213 ++++++++++++++++--
 src/build_tables/mod.rs              |   1 +
 src/grammars.rs                      |  12 +
 src/main.rs                          |   1 +
 src/parse_grammar.rs                 |   1 -
 src/rules.rs                         |  34 ++-
 9 files changed, 567 insertions(+), 31 deletions(-)
 create mode 100644 src/build_tables/inline_variables.rs

diff --git a/Cargo.lock b/Cargo.lock
index d5109fb7..410580fa 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -67,11 +67,6 @@ name = "bitflags"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "bitvec"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "blake2-rfc"
 version = "0.2.18"
@@ -461,16 +456,17 @@ dependencies = [
 name = "rust-tree-sitter-cli"
 version = "0.1.0"
 dependencies = [
- "bitvec 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
+ "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "tree-sitter 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
@@ -548,6 +544,11 @@ dependencies = [
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "smallbitvec"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "smallvec"
 version = "0.6.7"
@@ -729,7 +730,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a"
 "checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
 "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
-"checksum bitvec 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e37e2176261200377c7cde4c6de020394174df556c356f965e4bc239f5ce1c5a"
 "checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
 "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
 "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
@@ -787,6 +787,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
 "checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
 "checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
+"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
 "checksum smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b73ea3738b47563803ef814925e69be00799a8c07420be8b996f8e98fb2336db"
 "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8"
 "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
diff --git a/Cargo.toml b/Cargo.toml
index 93a49d2c..f3880a1c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,8 @@ authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 
 [dependencies]
-bitvec = "0.8"
+lazy_static = "1.2.0"
+smallbitvec = "2.3.0"
 clap = "2.32"
 dirs = "1.0.2"
 ignore = "0.4.4"
diff --git a/src/build_tables/inline_variables.rs b/src/build_tables/inline_variables.rs
new file mode 100644
index 00000000..d201519f
--- /dev/null
+++ b/src/build_tables/inline_variables.rs
@@ -0,0 +1,318 @@
+use super::item::ParseItem;
+use crate::grammars::{Production, SyntaxGrammar};
+use std::collections::HashMap;
+
+pub(crate) struct InlinedProductionMap {
+    pub inlined_productions: Vec<Production>,
+    item_map: HashMap<ParseItem, Vec<u32>>,
+}
+
+impl InlinedProductionMap {
+    pub fn new(grammar: &SyntaxGrammar) -> Self {
+        let mut result = Self {
+            inlined_productions: Vec::new(),
+            item_map: HashMap::new(),
+        };
+
+        let mut items_to_process = Vec::new();
+        for (variable_index, variable) in grammar.variables.iter().enumerate() {
+            for production_index in 0..variable.productions.len() {
+                items_to_process.push(ParseItem::Normal {
+                    variable_index: variable_index as u32,
+                    production_index: production_index as u32,
+                    step_index: 0,
+                });
+                while !items_to_process.is_empty() {
+                    let mut i = 0;
+                    while i < items_to_process.len() {
+                        let item = &items_to_process[i];
+                        if let Some(step) = item.step(grammar, &result) {
+                            if grammar.variables_to_inline.contains(&step.symbol) {
+                                let inlined_items = result
+                                    .inline(*item, grammar)
+                                    .into_iter()
+                                    .map(|production_index| ParseItem::Inlined {
+                                        variable_index: item.variable_index(),
+                                        production_index: *production_index,
+                                        step_index: item.step_index() as u32,
+                                    })
+                                    .collect::<Vec<_>>();
+                                items_to_process.splice(i..i + 1, inlined_items);
+                            } else {
+                                items_to_process[i] = item.successor();
+                                i += 1;
+                            }
+                        } else {
+                            items_to_process.remove(i);
+                        }
+                    }
+                }
+            }
+        }
+
+        result
+    }
+
+    pub fn inlined_items<'a>(
+        &'a self,
+        item: ParseItem,
+    ) -> Option<impl Iterator<Item = ParseItem> + 'a> {
+        self.item_map.get(&item).map(|production_indices| {
+            production_indices
+                .iter()
+                .cloned()
+                .map(move |production_index| ParseItem::Inlined {
+                    variable_index: item.variable_index(),
+                    production_index,
+                    step_index: item.step_index() as u32,
+                })
+        })
+    }
+
+    fn inline(&mut self, item: ParseItem, grammar: &SyntaxGrammar) -> &Vec<u32> {
+        let step_index = item.step_index();
+        let mut productions_to_add = grammar.variables
+            [item.step(grammar, self).unwrap().symbol.index]
+            .productions
+            .clone();
+
+        let mut i = 0;
+        while i < productions_to_add.len() {
+            if let Some(first_symbol) = productions_to_add[i].first_symbol() {
+                if grammar.variables_to_inline.contains(&first_symbol) {
+                    // Remove the production from the vector, replacing it with a placeholder.
+                    let production = productions_to_add
+                        .splice(i..i + 1, [Production::default()].iter().cloned())
+                        .next()
+                        .unwrap();
+
+                    // Replace the placeholder with the inlined productions.
+                    productions_to_add.splice(
+                        i..i + 1,
+                        grammar.variables[first_symbol.index]
+                            .productions
+                            .iter()
+                            .map(|p| {
+                                let mut p = p.clone();
+                                p.steps.extend(production.steps[1..].iter().cloned());
+                                p
+                            }),
+                    );
+                    continue;
+                }
+            }
+            i += 1;
+        }
+
+        let result = productions_to_add
+            .into_iter()
+            .map(|production_to_add| {
+                let mut inlined_production = item.production(grammar, &self).clone();
+                inlined_production.steps.splice(
+                    step_index..step_index + 1,
+                    production_to_add.steps.iter().cloned(),
+                );
+                self.inlined_productions
+                    .iter()
+                    .position(|p| *p == inlined_production)
+                    .unwrap_or({
+                        self.inlined_productions.push(inlined_production);
+                        self.inlined_productions.len() - 1
+                    }) as u32
+            })
+            .collect();
+
+        self.item_map.entry(item).or_insert(result)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
+    use crate::rules::Symbol;
+
+    #[test]
+    fn test_basic_inlining() {
+        let grammar = SyntaxGrammar {
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+            variables_to_inline: vec![Symbol::non_terminal(1)],
+            variables: vec![
+                SyntaxVariable {
+                    name: "var0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                            ProductionStep::new(Symbol::terminal(11)),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "var1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::terminal(12)),
+                                ProductionStep::new(Symbol::terminal(13)),
+                            ],
+                        },
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![ProductionStep::new(Symbol::terminal(14))],
+                        },
+                    ],
+                },
+            ],
+        };
+
+        let inline_map = InlinedProductionMap::new(&grammar);
+
+        // Nothing to inline at step 0.
+        assert_eq!(
+            display_items(
+                inline_map.inlined_items(ParseItem::Normal {
+                    variable_index: 0,
+                    production_index: 0,
+                    step_index: 0
+                }),
+                &grammar,
+                &inline_map
+            ),
+            None
+        );
+
+        // Inlining variable 1 yields two productions.
+        assert_eq!(
+            display_items(
+                inline_map.inlined_items(ParseItem::Normal {
+                    variable_index: 0,
+                    production_index: 0,
+                    step_index: 1
+                }),
+                &grammar,
+                &inline_map
+            ),
+            Some(vec![
+                "terminal-10 • terminal-12 terminal-13 terminal-11".to_string(),
+                "terminal-10 • terminal-14 terminal-11".to_string(),
+            ])
+        );
+    }
+
+    #[test]
+    fn test_nested_inlining() {
+        let grammar = SyntaxGrammar {
+            variables: vec![
+                SyntaxVariable {
+                    name: "var0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::terminal(10)),
+                                ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                                ProductionStep::new(Symbol::terminal(11)),
+                                ProductionStep::new(Symbol::non_terminal(2)), // inlined
+                                ProductionStep::new(Symbol::terminal(12)),
+                            ],
+                        },
+                    ],
+                },
+                SyntaxVariable {
+                    name: "var1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![ProductionStep::new(Symbol::terminal(13))],
+                        },
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::non_terminal(3)), // inlined
+                                ProductionStep::new(Symbol::terminal(14)),
+                            ],
+                        },
+                    ],
+                },
+                SyntaxVariable {
+                    name: "var2".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(15))],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "var3".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(16))],
+                    }],
+                },
+            ],
+            variables_to_inline: vec![
+                Symbol::non_terminal(1),
+                Symbol::non_terminal(2),
+                Symbol::non_terminal(3),
+            ],
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+
+        let inline_map = InlinedProductionMap::new(&grammar);
+
+        let items = inline_map.inlined_items(ParseItem::Normal {
+            variable_index: 0,
+            production_index: 0,
+            step_index: 1
+        }).unwrap().collect::<Vec<_>>();
+
+        assert_eq!(
+            display_items(Some(items.iter().cloned()), &grammar, &inline_map),
+            Some(vec![
+                "terminal-10 • terminal-13 terminal-11 non-terminal-2 terminal-12".to_string(),
+                "terminal-10 • terminal-16 terminal-14 terminal-11 non-terminal-2 terminal-12".to_string()
+            ])
+        );
+
+        let item = items[0].successor().successor();
+        assert_eq!(
+            display_items(Some([item].iter().cloned()), &grammar, &inline_map),
+            Some(vec![
+                "terminal-10 terminal-13 terminal-11 • non-terminal-2 terminal-12".to_string(),
+            ])
+        );
+
+        assert_eq!(
+            display_items(inline_map.inlined_items(item), &grammar, &inline_map),
+            Some(vec![
+                "terminal-10 terminal-13 terminal-11 • terminal-15 terminal-12".to_string(),
+            ])
+        );
+    }
+
+    fn display_items(
+        items: Option<impl Iterator<Item = ParseItem>>,
+        grammar: &SyntaxGrammar,
+        inline_map: &InlinedProductionMap,
+    ) -> Option<Vec<String>> {
+        items.map(|items| {
+            items
+                .map(|item| format!("{}", item.with(grammar, inline_map)))
+                .collect()
+        })
+    }
+}
diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index c8d30997..537b0928 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -1,22 +1,209 @@
-use crate::grammars::Production;
+use super::inline_variables::InlinedProductionMap;
+use crate::grammars::{Production, ProductionStep, SyntaxGrammar};
+use crate::rules::{Symbol, SymbolType};
+use smallbitvec::SmallBitVec;
 use std::collections::HashMap;
-use bitvec::BitVec;
+use std::hash::{Hash, Hasher};
+use std::fmt;
 
-#[derive(Debug, PartialEq, Eq)]
-pub(super) struct LookaheadSet {
-    terminal_bits: BitVec,
-    external_bits: BitVec,
+lazy_static! {
+    static ref START_PRODUCTION: Production = Production {
+        dynamic_precedence: 0,
+        steps: vec![ProductionStep {
+            symbol: Symbol {
+                index: 0,
+                kind: SymbolType::NonTerminal,
+            },
+            precedence: 0,
+            associativity: None,
+            alias: None,
+        }],
+    };
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct LookaheadSet {
+    terminal_bits: SmallBitVec,
+    external_bits: SmallBitVec,
     eof: bool,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub(super) struct ParseItem {
-    variable_index: u32,
-    production_index: u32,
-    step_index: u32,
+pub(crate) enum ParseItem {
+    Start {
+        step_index: u32,
+    },
+    Normal {
+        variable_index: u32,
+        production_index: u32,
+        step_index: u32,
+    },
+    Inlined {
+        variable_index: u32,
+        production_index: u32,
+        step_index: u32,
+    },
 }
 
-#[derive(Debug, PartialEq, Eq)]
-pub(super) struct ParseItemSet {
-    entries: HashMap<ParseItem, LookaheadSet>
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ParseItemSet {
+    pub entries: HashMap<ParseItem, LookaheadSet>,
+}
+
+impl LookaheadSet {
+    pub fn new() -> Self {
+        Self {
+            terminal_bits: SmallBitVec::new(),
+            external_bits: SmallBitVec::new(),
+            eof: false,
+        }
+    }
+
+    pub fn insert(&mut self, other: Symbol) {
+        match other.kind {
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
+            SymbolType::Terminal => self.terminal_bits.set(other.index, true),
+            SymbolType::External => self.external_bits.set(other.index, true),
+        }
+    }
+
+    pub fn insert_all(&mut self, other: &LookaheadSet) -> bool {
+        let mut result = false;
+        if other.terminal_bits.len() > self.terminal_bits.len() {
+            self.terminal_bits.resize(other.terminal_bits.len(), false);
+        }
+        if other.external_bits.len() > self.external_bits.len() {
+            self.external_bits.resize(other.external_bits.len(), false);
+        }
+        for (i, element) in other.terminal_bits.iter().enumerate() {
+            if element {
+                result |= !self.terminal_bits[i];
+                self.terminal_bits.set(i, element);
+            }
+        }
+        for (i, element) in other.external_bits.iter().enumerate() {
+            if element {
+                result |= !self.external_bits[i];
+                self.external_bits.set(i, element);
+            }
+        }
+        if other.eof {
+            result |= !self.eof;
+            self.eof = true;
+        }
+        result
+    }
+}
+
+impl ParseItem {
+    pub fn is_kernel(&self) -> bool {
+        match self {
+            ParseItem::Start { .. } => true,
+            ParseItem::Normal { step_index, .. } | ParseItem::Inlined { step_index, .. } => {
+                *step_index > 0
+            }
+        }
+    }
+
+    pub fn production<'a>(
+        &'a self,
+        grammar: &'a SyntaxGrammar,
+        inlined_productions: &'a InlinedProductionMap,
+    ) -> &'a Production {
+        match self {
+            ParseItem::Start { .. } => &START_PRODUCTION,
+            ParseItem::Normal {
+                variable_index,
+                production_index,
+                ..
+            } => {
+                &grammar.variables[*variable_index as usize].productions[*production_index as usize]
+            }
+            ParseItem::Inlined {
+                production_index,
+                ..
+            } => &inlined_productions.inlined_productions[*production_index as usize],
+        }
+    }
+
+    pub fn step<'a>(
+        &'a self,
+        grammar: &'a SyntaxGrammar,
+        inlined_productions: &'a InlinedProductionMap,
+    ) -> Option<&'a ProductionStep> {
+        self.production(grammar, inlined_productions).steps.get(self.step_index())
+    }
+
+    pub fn variable_index(&self) -> u32 {
+        match self {
+            ParseItem::Start { .. } => panic!("Start item doesn't have a variable index"),
+            ParseItem::Normal { variable_index, .. }
+            | ParseItem::Inlined { variable_index, .. } => *variable_index,
+        }
+    }
+
+    pub fn step_index(&self) -> usize {
+        match self {
+            ParseItem::Start { step_index }
+            | ParseItem::Normal { step_index, .. }
+            | ParseItem::Inlined { step_index, .. } => *step_index as usize,
+        }
+    }
+
+    fn step_index_mut(&mut self) -> &mut u32 {
+        match self {
+            ParseItem::Start { step_index }
+            | ParseItem::Normal { step_index, .. }
+            | ParseItem::Inlined { step_index, .. } => step_index,
+        }
+    }
+
+    pub fn with<'a>(&'a self, grammar: &'a SyntaxGrammar, inlines: &'a InlinedProductionMap) -> ParseItemDisplay<'a> {
+        ParseItemDisplay(self, grammar, inlines)
+    }
+
+    pub fn successor(&self) -> ParseItem {
+        let mut result = self.clone();
+        *result.step_index_mut() += 1;
+        result
+    }
+}
+
+pub struct ParseItemDisplay<'a>(&'a ParseItem, &'a SyntaxGrammar, &'a InlinedProductionMap);
+
+impl<'a> fmt::Display for ParseItemDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        let step_index = self.0.step_index();
+        let production = self.0.production(self.1, self.2);
+        for (i, step) in production.steps.iter().enumerate() {
+            if i > 0 {
+                write!(f, " ")?;
+            }
+
+            if i == step_index {
+                write!(f, "• ")?;
+            }
+
+            let name = if step.symbol.is_terminal() {
+                "terminal"
+            } else if step.symbol.is_external() {
+                "external"
+            } else {
+                "non-terminal"
+            };
+
+            write!(f, "{}-{}", name, step.symbol.index)?;
+        }
+        Ok(())
+    }
+}
+
+impl Hash for ParseItemSet {
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        hasher.write_usize(self.entries.len());
+        for (item, lookaheads) in self.entries.iter() {
+            item.hash(hasher);
+            lookaheads.hash(hasher);
+        }
+    }
 }
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index c3518428..f7bb1f9c 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -1,4 +1,5 @@
 mod item;
+mod inline_variables;
 
 use std::collections::{HashMap, VecDeque};
 use crate::grammars::{SyntaxGrammar, LexicalGrammar};
diff --git a/src/grammars.rs b/src/grammars.rs
index 74c213e1..8abdad24 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -108,6 +108,18 @@ impl ProductionStep {
     }
 }
 
+impl Production {
+    pub fn first_symbol(&self) -> Option<Symbol> {
+        self.steps.first().map(|s| s.symbol.clone())
+    }
+}
+
+impl Default for Production {
+    fn default() -> Self {
+        Production { dynamic_precedence: 0, steps: Vec::new() }
+    }
+}
+
 impl Variable {
     pub fn named(name: &str, rule: Rule) -> Self {
         Self { name: name.to_string(), kind: VariableType::Named, rule }
diff --git a/src/main.rs b/src/main.rs
index b83764fc..9dc9efb2 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,6 +2,7 @@ use clap::{App, Arg, SubCommand};
 
 #[macro_use] extern crate serde_derive;
 #[macro_use] extern crate serde_json;
+#[macro_use] extern crate lazy_static;
 
 mod build_tables;
 mod error;
diff --git a/src/parse_grammar.rs b/src/parse_grammar.rs
index 0f1f5008..27dc8b05 100644
--- a/src/parse_grammar.rs
+++ b/src/parse_grammar.rs
@@ -2,7 +2,6 @@ use serde_json::{Map, Value};
 use crate::error::Result;
 use crate::grammars::{InputGrammar, Variable, VariableType};
 use crate::rules::Rule;
-use std::collections::HashMap;
 
 #[derive(Deserialize)]
 #[serde(tag = "type")]
diff --git a/src/rules.rs b/src/rules.rs
index d7234f45..9374a283 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -10,7 +10,7 @@ pub(crate) enum SymbolType {
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub(crate) enum Associativity {
     Left,
-    Right
+    Right,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
@@ -137,24 +137,37 @@ impl Rule {
 }
 
 impl Symbol {
+    pub fn is_terminal(&self) -> bool {
+        self.kind == SymbolType::Terminal
+    }
+
     pub fn is_non_terminal(&self) -> bool {
-        return self.kind == SymbolType::NonTerminal
+        self.kind == SymbolType::NonTerminal
     }
 
     pub fn is_external(&self) -> bool {
-        return self.kind == SymbolType::External
+        self.kind == SymbolType::External
     }
 
     pub fn non_terminal(index: usize) -> Self {
-        Symbol { kind: SymbolType::NonTerminal, index }
+        Symbol {
+            kind: SymbolType::NonTerminal,
+            index,
+        }
     }
 
     pub fn terminal(index: usize) -> Self {
-        Symbol { kind: SymbolType::Terminal, index }
+        Symbol {
+            kind: SymbolType::Terminal,
+            index,
+        }
     }
 
     pub fn external(index: usize) -> Self {
-        Symbol { kind: SymbolType::External, index }
+        Symbol {
+            kind: SymbolType::External,
+            index,
+        }
     }
 }
 
@@ -169,11 +182,14 @@ fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
         Rule::Metadata { rule, mut params } => {
             f(&mut params);
             Rule::Metadata { rule, params }
-        },
+        }
         _ => {
             let mut params = MetadataParams::default();
             f(&mut params);
-            Rule::Metadata { rule: Box::new(input), params }
+            Rule::Metadata {
+                rule: Box::new(input),
+                params,
+            }
         }
     }
 }
@@ -184,7 +200,7 @@ fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
             for element in elements {
                 choice_helper(result, element);
             }
-        },
+        }
         _ => {
             if !result.contains(&rule) {
                 result.push(rule);

From 143588c148a130217beb7c547647d8e3442b9762 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 18 Dec 2018 17:31:54 -0800
Subject: [PATCH 069/208] Implement ItemSetBuilder

---
 src/build_tables/item.rs             |  16 +-
 src/build_tables/item_set_builder.rs | 279 +++++++++++++++++++++++++++
 src/build_tables/mod.rs              |   2 +
 3 files changed, 294 insertions(+), 3 deletions(-)
 create mode 100644 src/build_tables/item_set_builder.rs

diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index 537b0928..c99815eb 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -50,6 +50,8 @@ pub(crate) struct ParseItemSet {
     pub entries: HashMap<ParseItem, LookaheadSet>,
 }
 
+pub(crate) struct ParseItemDisplay<'a>(&'a ParseItem, &'a SyntaxGrammar, &'a InlinedProductionMap);
+
 impl LookaheadSet {
     pub fn new() -> Self {
         Self {
@@ -96,6 +98,10 @@ impl LookaheadSet {
 }
 
 impl ParseItem {
+    pub fn start() -> Self {
+        ParseItem::Start { step_index: 0 }
+    }
+
     pub fn is_kernel(&self) -> bool {
         match self {
             ParseItem::Start { .. } => true,
@@ -106,7 +112,7 @@ impl ParseItem {
     }
 
     pub fn production<'a>(
-        &'a self,
+        &self,
         grammar: &'a SyntaxGrammar,
         inlined_productions: &'a InlinedProductionMap,
     ) -> &'a Production {
@@ -127,7 +133,7 @@ impl ParseItem {
     }
 
     pub fn step<'a>(
-        &'a self,
+        &self,
         grammar: &'a SyntaxGrammar,
         inlined_productions: &'a InlinedProductionMap,
     ) -> Option<&'a ProductionStep> {
@@ -169,7 +175,11 @@ impl ParseItem {
     }
 }
 
-pub struct ParseItemDisplay<'a>(&'a ParseItem, &'a SyntaxGrammar, &'a InlinedProductionMap);
+impl ParseItemSet {
+    pub fn new() -> Self {
+        Self { entries: HashMap::new() }
+    }
+}
 
 impl<'a> fmt::Display for ParseItemDisplay<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
new file mode 100644
index 00000000..61d45ded
--- /dev/null
+++ b/src/build_tables/item_set_builder.rs
@@ -0,0 +1,279 @@
+use super::inline_variables::InlinedProductionMap;
+use super::item::{LookaheadSet, ParseItem, ParseItemSet};
+use crate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::rules::Symbol;
+use std::collections::{HashMap, HashSet};
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct TransitiveClosureAddition {
+    item: ParseItem,
+    info: FollowSetInfo,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct FollowSetInfo {
+    lookaheads: LookaheadSet,
+    propagates_lookaheads: bool,
+}
+
+pub(crate) struct ParseItemSetBuilder {
+    first_sets: HashMap<Symbol, LookaheadSet>,
+    last_sets: HashMap<Symbol, LookaheadSet>,
+    transitive_closure_additions: Vec<Vec<TransitiveClosureAddition>>,
+    inlined_production_map: InlinedProductionMap,
+}
+
+fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
+    if !vector.contains(&value) {
+        vector.push(value);
+    }
+}
+
+impl ParseItemSetBuilder {
+    pub fn new(syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar) -> Self {
+        let mut result = Self {
+            first_sets: HashMap::new(),
+            last_sets: HashMap::new(),
+            transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
+            inlined_production_map: InlinedProductionMap::new(syntax_grammar),
+        };
+
+        // For each grammar symbol, populate the FIRST and LAST sets: the set of
+        // terminals that appear at the beginning and end that symbol's productions,
+        // respectively.
+        //
+        // For a terminal symbol, the FIRST and LAST set just consists of the
+        // terminal itself.
+        for i in 0..lexical_grammar.variables.len() {
+            let symbol = Symbol::terminal(i);
+            let mut set = LookaheadSet::new();
+            set.insert(symbol);
+            result.first_sets.insert(symbol, set.clone());
+            result.last_sets.insert(symbol, set);
+        }
+
+        for i in 0..syntax_grammar.external_tokens.len() {
+            let symbol = Symbol::external(i);
+            let mut set = LookaheadSet::new();
+            set.insert(symbol);
+            result.first_sets.insert(symbol, set.clone());
+            result.last_sets.insert(symbol, set);
+        }
+
+        // The FIRST set of a non-terminal `i` is the union of the following sets:
+        // * the set of all terminals that appear at the beginings of i's productions
+        // * the FIRST sets of all the non-terminals that appear at the beginnings
+        //   of i's productions
+        //
+        // Rather than computing these sets using recursion, we use an explicit stack
+        // called `symbols_to_process`.
+        let mut symbols_to_process = Vec::new();
+        let mut processed_non_terminals = HashSet::new();
+        for i in 0..syntax_grammar.variables.len() {
+            let symbol = Symbol::non_terminal(i);
+
+            let first_set = &mut result
+                .first_sets
+                .entry(symbol)
+                .or_insert(LookaheadSet::new());
+            processed_non_terminals.clear();
+            symbols_to_process.clear();
+            symbols_to_process.push(symbol);
+            while let Some(current_symbol) = symbols_to_process.pop() {
+                if current_symbol.is_terminal() || current_symbol.is_external() {
+                    first_set.insert(current_symbol);
+                } else if processed_non_terminals.insert(current_symbol) {
+                    for production in syntax_grammar.variables[current_symbol.index]
+                        .productions
+                        .iter()
+                    {
+                        if let Some(step) = production.steps.first() {
+                            symbols_to_process.push(step.symbol);
+                        }
+                    }
+                }
+            }
+
+            // The LAST set is defined in a similar way to the FIRST set.
+            let last_set = &mut result
+                .last_sets
+                .entry(symbol)
+                .or_insert(LookaheadSet::new());
+            processed_non_terminals.clear();
+            symbols_to_process.clear();
+            symbols_to_process.push(symbol);
+            while let Some(current_symbol) = symbols_to_process.pop() {
+                if current_symbol.is_terminal() || current_symbol.is_external() {
+                    last_set.insert(current_symbol);
+                } else if processed_non_terminals.insert(current_symbol) {
+                    for production in syntax_grammar.variables[current_symbol.index]
+                        .productions
+                        .iter()
+                    {
+                        if let Some(step) = production.steps.last() {
+                            symbols_to_process.push(step.symbol);
+                        }
+                    }
+                }
+            }
+        }
+
+        // To compute an item set's transitive closure, we find each item in the set
+        // whose next symbol is a non-terminal, and we add new items to the set for
+        // each of that symbols' productions. These productions might themselves begin
+        // with non-terminals, so the process continues recursively. In this process,
+        // the total set of entries that get added depends only on two things:
+        //   * the set of non-terminal symbols that occur at each item's current position
+        //   * the set of terminals that occurs after each of these non-terminal symbols
+        //
+        // So we can avoid a lot of duplicated recursive work by precomputing, for each
+        // non-terminal symbol `i`, a final list of *additions* that must be made to an
+        // item set when `i` occurs as the next symbol in one if its core items. The
+        // structure of an *addition* is as follows:
+        //   * `item` - the new item that must be added as part of the expansion of `i`
+        //   * `lookaheads` - lookahead tokens that can always come after that item in
+        //      the expansion of `i`
+        //   * `propagates_lookaheads` - a boolean indicating whether or not `item` can
+        //      occur at the *end* of the expansion of `i`, so that i's own current
+        //      lookahead tokens can occur after `item`.
+        //
+        // Again, rather than computing these additions recursively, we use an explicit
+        // stack called `entries_to_process`.
+        for i in 0..syntax_grammar.variables.len() {
+            let empty_lookaheads = LookaheadSet::new();
+            let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
+
+            // First, build up a map whose keys are all of the non-terminals that can
+            // appear at the beginning of non-terminal `i`, and whose values store
+            // information about the tokens that can follow each non-terminal.
+            let mut follow_set_info_by_non_terminal = HashMap::new();
+            while let Some(entry) = entries_to_process.pop() {
+                let (variable_index, lookaheads, propagates_lookaheads) = entry;
+                let existing_info = follow_set_info_by_non_terminal
+                    .entry(variable_index)
+                    .or_insert_with(|| FollowSetInfo {
+                        lookaheads: LookaheadSet::new(),
+                        propagates_lookaheads: false,
+                    });
+
+                let did_add_follow_set_info;
+                if propagates_lookaheads {
+                    did_add_follow_set_info = !existing_info.propagates_lookaheads;
+                    existing_info.propagates_lookaheads = true;
+                } else {
+                    did_add_follow_set_info = existing_info.lookaheads.insert_all(lookaheads);
+                }
+
+                if did_add_follow_set_info {
+                    for production in &syntax_grammar.variables[variable_index].productions {
+                        if let Some(symbol) = production.first_symbol() {
+                            if symbol.is_non_terminal() {
+                                if production.steps.len() == 1 {
+                                    entries_to_process.push((
+                                        symbol.index,
+                                        lookaheads,
+                                        propagates_lookaheads,
+                                    ));
+                                } else {
+                                    entries_to_process.push((
+                                        symbol.index,
+                                        &result.first_sets[&production.steps[1].symbol],
+                                        false,
+                                    ));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Store all of those non-terminals' productions, along with their associated
+            // lookahead info, as *additions* associated with non-terminal `i`.
+            let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
+            for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
+                let variable = &syntax_grammar.variables[variable_index];
+                for production_index in 0..variable.productions.len() {
+                    let item = ParseItem::Normal {
+                        variable_index: variable_index as u32,
+                        production_index: production_index as u32,
+                        step_index: 0,
+                    };
+
+                    if let Some(inlined_items) = result.inlined_production_map.inlined_items(item) {
+                        for inlined_item in inlined_items {
+                            find_or_push(
+                                additions_for_non_terminal,
+                                TransitiveClosureAddition {
+                                    item: inlined_item,
+                                    info: follow_set_info.clone(),
+                                },
+                            );
+                        }
+                    } else {
+                        find_or_push(
+                            additions_for_non_terminal,
+                            TransitiveClosureAddition {
+                                item,
+                                info: follow_set_info.clone(),
+                            },
+                        );
+                    }
+                }
+            }
+        }
+
+        result
+    }
+
+    pub(crate) fn transitive_closure(
+        &mut self,
+        item_set: ParseItemSet,
+        grammar: &SyntaxGrammar,
+    ) -> ParseItemSet {
+        let mut result = ParseItemSet::new();
+        for (item, lookaheads) in item_set.entries {
+            if let Some(items) = self.inlined_production_map.inlined_items(item) {
+                for item in items {
+                    self.add_item(&mut result, item, lookaheads.clone(), grammar);
+                }
+            } else {
+                self.add_item(&mut result, item, lookaheads, grammar);
+            }
+        }
+        result
+    }
+
+    fn add_item(
+        &self,
+        set: &mut ParseItemSet,
+        item: ParseItem,
+        lookaheads: LookaheadSet,
+        grammar: &SyntaxGrammar,
+    ) {
+        if let Some(step) = item.step(grammar, &self.inlined_production_map) {
+            if step.symbol.is_non_terminal() {
+                let next_step = item.successor().step(grammar, &self.inlined_production_map);
+
+                // Determine which tokens can follow this non-terminal.
+                let following_tokens = if let Some(next_step) = next_step {
+                    self.first_sets.get(&next_step.symbol).unwrap()
+                } else {
+                    &lookaheads
+                };
+
+                // Use the pre-computed *additions* to expand the non-terminal.
+                for addition in &self.transitive_closure_additions[step.symbol.index] {
+                    let lookaheads = set
+                        .entries
+                        .entry(addition.item)
+                        .or_insert_with(|| LookaheadSet::new());
+                    lookaheads.insert_all(&addition.info.lookaheads);
+                    if addition.info.propagates_lookaheads {
+                        lookaheads.insert_all(following_tokens);
+                    }
+                }
+            }
+        }
+        set.entries.insert(item, lookaheads);
+    }
+}
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index f7bb1f9c..01d9219d 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -1,5 +1,7 @@
 mod item;
 mod inline_variables;
+mod item;
+mod item_set_builder;
 
 use std::collections::{HashMap, VecDeque};
 use crate::grammars::{SyntaxGrammar, LexicalGrammar};

From d078c263b0fc003c24ba2d08355fb1a87af6b65f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 20 Dec 2018 13:35:13 -0800
Subject: [PATCH 070/208] Fix bugs in grammar JSON parsing

---
 Cargo.lock           |  7 +++++++
 Cargo.toml           |  5 ++++-
 src/parse_grammar.rs | 23 +++++++++++++++++++----
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 410580fa..538517f1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -229,6 +229,11 @@ dependencies = [
  "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "indexmap"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "itoa"
 version = "0.4.3"
@@ -539,6 +544,7 @@ name = "serde_json"
 version = "1.0.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
+ "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -748,6 +754,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
 "checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
 "checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01"
+"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
 "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
 "checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
 "checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311"
diff --git a/Cargo.toml b/Cargo.toml
index f3880a1c..b29bc85e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,6 +14,9 @@ libloading = "0.5"
 rusqlite = "0.14.0"
 serde = "1.0"
 serde_derive = "1.0"
-serde_json = "1.0"
 tree-sitter = "0.3.1"
 regex-syntax = "0.6.4"
+
+[dependencies.serde_json]
+version = "1.0"
+features = ["preserve_order"]
diff --git a/src/parse_grammar.rs b/src/parse_grammar.rs
index 27dc8b05..07396329 100644
--- a/src/parse_grammar.rs
+++ b/src/parse_grammar.rs
@@ -7,6 +7,11 @@ use crate::rules::Rule;
 #[serde(tag = "type")]
 #[allow(non_camel_case_types)]
 enum RuleJSON {
+    ALIAS {
+        content: Box<RuleJSON>,
+        named: bool,
+        value: String,
+    },
     BLANK,
     STRING {
         value: String,
@@ -26,6 +31,13 @@ enum RuleJSON {
     REPEAT {
         content: Box<RuleJSON>,
     },
+    REPEAT1 {
+        content: Box<RuleJSON>,
+    },
+    PREC_DYNAMIC {
+        value: i32,
+        content: Box<RuleJSON>,
+    },
     PREC_LEFT {
         value: i32,
         content: Box<RuleJSON>,
@@ -41,7 +53,7 @@ enum RuleJSON {
     TOKEN {
         content: Box<RuleJSON>,
     },
-    TOKEN_IMMEDIATE {
+    IMMEDIATE_TOKEN {
         content: Box<RuleJSON>,
     },
 }
@@ -97,18 +109,21 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
 
 fn parse_rule(json: RuleJSON) -> Rule {
     match json {
+        RuleJSON::ALIAS { content, value, named } => Rule::alias(parse_rule(*content), value, named),
         RuleJSON::BLANK => Rule::Blank,
         RuleJSON::STRING { value } => Rule::String(value),
         RuleJSON::PATTERN { value } => Rule::Pattern(value),
         RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
         RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
         RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
-        RuleJSON::REPEAT { content } => Rule::repeat(parse_rule(*content)),
+        RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
+        RuleJSON::REPEAT { content } => Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank]),
         RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)),
         RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)),
         RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)),
+        RuleJSON::PREC_DYNAMIC { value, content } => Rule::prec_dynamic(value, parse_rule(*content)),
         RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
-        RuleJSON::TOKEN_IMMEDIATE { content } => Rule::immediate_token(parse_rule(*content)),
+        RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
     }
 }
 
@@ -122,7 +137,7 @@ mod tests {
             "name": "my_lang",
             "rules": {
                 "file": {
-                    "type": "REPEAT",
+                    "type": "REPEAT1",
                     "content": {
                         "type": "SYMBOL",
                         "name": "statement"

From 988dc7de35278f2ab36df90190a83c3727f391c9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 20 Dec 2018 13:35:34 -0800
Subject: [PATCH 071/208] Handle precedence and aliases properly when inlining
 variables

---
 src/build_tables/inline_variables.rs | 263 ++++++++++++++++++++-------
 1 file changed, 193 insertions(+), 70 deletions(-)

diff --git a/src/build_tables/inline_variables.rs b/src/build_tables/inline_variables.rs
index d201519f..affbe163 100644
--- a/src/build_tables/inline_variables.rs
+++ b/src/build_tables/inline_variables.rs
@@ -108,10 +108,25 @@ impl InlinedProductionMap {
             .into_iter()
             .map(|production_to_add| {
                 let mut inlined_production = item.production(grammar, &self).clone();
-                inlined_production.steps.splice(
-                    step_index..step_index + 1,
-                    production_to_add.steps.iter().cloned(),
-                );
+                let removed_step = inlined_production
+                    .steps
+                    .splice(
+                        step_index..step_index + 1,
+                        production_to_add.steps.iter().cloned(),
+                    )
+                    .next()
+                    .unwrap();
+                let inserted_steps = &mut inlined_production.steps
+                    [step_index..step_index + production_to_add.steps.len()];
+                if let Some(alias) = removed_step.alias {
+                    for inserted_step in inserted_steps.iter_mut() {
+                        inserted_step.alias = Some(alias.clone());
+                    }
+                }
+                if let Some(last_inserted_step) = inserted_steps.last_mut() {
+                    last_inserted_step.precedence = removed_step.precedence;
+                    last_inserted_step.associativity = removed_step.associativity;
+                }
                 self.inlined_productions
                     .iter()
                     .position(|p| *p == inlined_production)
@@ -129,8 +144,9 @@ impl InlinedProductionMap {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
-    use crate::rules::Symbol;
+    use crate::grammars::{LexicalGrammar, ProductionStep, SyntaxVariable, VariableType};
+    use crate::rules::{Alias, Associativity, Symbol};
+    use std::borrow::Borrow;
 
     #[test]
     fn test_basic_inlining() {
@@ -142,7 +158,7 @@ mod tests {
             variables_to_inline: vec![Symbol::non_terminal(1)],
             variables: vec![
                 SyntaxVariable {
-                    name: "var0".to_string(),
+                    name: "non-terminal-0".to_string(),
                     kind: VariableType::Named,
                     productions: vec![Production {
                         dynamic_precedence: 0,
@@ -154,7 +170,7 @@ mod tests {
                     }],
                 },
                 SyntaxVariable {
-                    name: "var1".to_string(),
+                    name: "non-terminal-1".to_string(),
                     kind: VariableType::Named,
                     productions: vec![
                         Production {
@@ -176,34 +192,32 @@ mod tests {
         let inline_map = InlinedProductionMap::new(&grammar);
 
         // Nothing to inline at step 0.
-        assert_eq!(
-            display_items(
-                inline_map.inlined_items(ParseItem::Normal {
-                    variable_index: 0,
-                    production_index: 0,
-                    step_index: 0
-                }),
-                &grammar,
-                &inline_map
-            ),
-            None
-        );
+        assert!(inline_map
+            .inlined_items(ParseItem::Normal {
+                variable_index: 0,
+                production_index: 0,
+                step_index: 0
+            })
+            .is_none());
 
         // Inlining variable 1 yields two productions.
         assert_eq!(
             display_items(
-                inline_map.inlined_items(ParseItem::Normal {
-                    variable_index: 0,
-                    production_index: 0,
-                    step_index: 1
-                }),
+                inline_map
+                    .inlined_items(ParseItem::Normal {
+                        variable_index: 0,
+                        production_index: 0,
+                        step_index: 1
+                    })
+                    .unwrap(),
                 &grammar,
                 &inline_map
             ),
-            Some(vec![
-                "terminal-10 • terminal-12 terminal-13 terminal-11".to_string(),
-                "terminal-10 • terminal-14 terminal-11".to_string(),
-            ])
+            vec![
+                "non-terminal-0 → terminal-10 • terminal-12 terminal-13 terminal-11"
+                    .to_string(),
+                "non-terminal-0 → terminal-10 • terminal-14 terminal-11".to_string(),
+            ]
         );
     }
 
@@ -212,23 +226,21 @@ mod tests {
         let grammar = SyntaxGrammar {
             variables: vec![
                 SyntaxVariable {
-                    name: "var0".to_string(),
+                    name: "non-terminal-0".to_string(),
                     kind: VariableType::Named,
-                    productions: vec![
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![
-                                ProductionStep::new(Symbol::terminal(10)),
-                                ProductionStep::new(Symbol::non_terminal(1)), // inlined
-                                ProductionStep::new(Symbol::terminal(11)),
-                                ProductionStep::new(Symbol::non_terminal(2)), // inlined
-                                ProductionStep::new(Symbol::terminal(12)),
-                            ],
-                        },
-                    ],
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                            ProductionStep::new(Symbol::terminal(11)),
+                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
+                            ProductionStep::new(Symbol::terminal(12)),
+                        ],
+                    }],
                 },
                 SyntaxVariable {
-                    name: "var1".to_string(),
+                    name: "non-terminal-1".to_string(),
                     kind: VariableType::Named,
                     productions: vec![
                         Production {
@@ -245,7 +257,7 @@ mod tests {
                     ],
                 },
                 SyntaxVariable {
-                    name: "var2".to_string(),
+                    name: "non-terminal-2".to_string(),
                     kind: VariableType::Named,
                     productions: vec![Production {
                         dynamic_precedence: 0,
@@ -253,7 +265,7 @@ mod tests {
                     }],
                 },
                 SyntaxVariable {
-                    name: "var3".to_string(),
+                    name: "non-terminal-3".to_string(),
                     kind: VariableType::Named,
                     productions: vec![Production {
                         dynamic_precedence: 0,
@@ -274,45 +286,156 @@ mod tests {
 
         let inline_map = InlinedProductionMap::new(&grammar);
 
-        let items = inline_map.inlined_items(ParseItem::Normal {
-            variable_index: 0,
-            production_index: 0,
-            step_index: 1
-        }).unwrap().collect::<Vec<_>>();
+        let items = inline_map
+            .inlined_items(ParseItem::Normal {
+                variable_index: 0,
+                production_index: 0,
+                step_index: 1,
+            })
+            .unwrap()
+            .collect::<Vec<_>>();
 
         assert_eq!(
-            display_items(Some(items.iter().cloned()), &grammar, &inline_map),
-            Some(vec![
-                "terminal-10 • terminal-13 terminal-11 non-terminal-2 terminal-12".to_string(),
-                "terminal-10 • terminal-16 terminal-14 terminal-11 non-terminal-2 terminal-12".to_string()
-            ])
+            display_items(&items, &grammar, &inline_map),
+            vec![
+                "non-terminal-0 → terminal-10 • terminal-13 terminal-11 non-terminal-2 terminal-12".to_string(),
+                "non-terminal-0 → terminal-10 • terminal-16 terminal-14 terminal-11 non-terminal-2 terminal-12".to_string()
+            ]
         );
 
         let item = items[0].successor().successor();
         assert_eq!(
-            display_items(Some([item].iter().cloned()), &grammar, &inline_map),
-            Some(vec![
-                "terminal-10 terminal-13 terminal-11 • non-terminal-2 terminal-12".to_string(),
-            ])
+            display_items(&[item], &grammar, &inline_map),
+            vec![
+                "non-terminal-0 → terminal-10 terminal-13 terminal-11 • non-terminal-2 terminal-12".to_string(),
+            ]
         );
 
         assert_eq!(
-            display_items(inline_map.inlined_items(item), &grammar, &inline_map),
-            Some(vec![
-                "terminal-10 terminal-13 terminal-11 • terminal-15 terminal-12".to_string(),
-            ])
+            display_items(inline_map.inlined_items(item).unwrap(), &grammar, &inline_map),
+            vec![
+                "non-terminal-0 → terminal-10 terminal-13 terminal-11 • terminal-15 terminal-12".to_string(),
+            ]
         );
     }
 
+    #[test]
+    fn test_inlining_with_precedence_and_alias() {
+        let grammar = SyntaxGrammar {
+            variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::non_terminal(1)) // inlined
+                                .with_prec(1, Some(Associativity::Left)),
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(11))
+                                .with_prec(2, None)
+                                .with_alias("inner_alias", true),
+                            ProductionStep::new(Symbol::terminal(12)).with_prec(3, None),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-2".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(13))
+                            .with_alias("outer_alias", true)],
+                    }],
+                },
+            ],
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+
+        let inline_map = InlinedProductionMap::new(&grammar);
+
+        let items = inline_map
+            .inlined_items(ParseItem::Normal {
+                variable_index: 0,
+                production_index: 0,
+                step_index: 0,
+            })
+            .unwrap()
+            .collect::<Vec<_>>();
+        assert_eq!(
+            display_items(&items, &grammar, &inline_map)[0],
+            "non-terminal-0 → • terminal-11 terminal-12 terminal-10 non-terminal-2".to_string(),
+        );
+
+        // The first step in the inlined production retains its precedence and alias.
+        let item = items[0].successor();
+        assert_eq!(
+            display_items(&[item], &grammar, &inline_map)[0],
+            "non-terminal-0 → terminal-11 • terminal-12 terminal-10 non-terminal-2".to_string(),
+        );
+        assert_eq!(item.precedence(&grammar, &inline_map), 2);
+        assert_eq!(
+            items[0].step(&grammar, &inline_map).unwrap().alias,
+            Some(Alias {
+                value: "inner_alias".to_string(),
+                is_named: true,
+            })
+        );
+
+        // The final terminal of the inlined production inherits the precedence of
+        // the inlined step.
+        let item = item.successor();
+        assert_eq!(
+            display_items(&[item], &grammar, &inline_map)[0],
+            "non-terminal-0 → terminal-11 terminal-12 • terminal-10 non-terminal-2".to_string(),
+        );
+        assert_eq!(item.precedence(&grammar, &inline_map), 1);
+
+        let item = item.successor();
+        assert_eq!(
+            display_items(&[item], &grammar, &inline_map)[0],
+            "non-terminal-0 → terminal-11 terminal-12 terminal-10 • non-terminal-2".to_string(),
+        );
+
+        // All steps of the inlined production inherit their alias from the
+        // inlined step.
+        let items = inline_map.inlined_items(item).unwrap().collect::<Vec<_>>();
+        assert_eq!(
+            display_items(&items, &grammar, &inline_map)[0],
+            "non-terminal-0 → terminal-11 terminal-12 terminal-10 • terminal-13".to_string(),
+        );
+        assert_eq!(
+            items[0].step(&grammar, &inline_map).unwrap().alias,
+            Some(Alias {
+                value: "outer_alias".to_string(),
+                is_named: true,
+            })
+        )
+    }
+
     fn display_items(
-        items: Option<impl Iterator<Item = ParseItem>>,
+        items: impl IntoIterator<Item = impl Borrow<ParseItem>>,
         grammar: &SyntaxGrammar,
         inline_map: &InlinedProductionMap,
-    ) -> Option<Vec<String>> {
-        items.map(|items| {
-            items
-                .map(|item| format!("{}", item.with(grammar, inline_map)))
-                .collect()
-        })
+    ) -> Vec<String> {
+        let lex = LexicalGrammar::default();
+        items
+            .into_iter()
+            .map(|item| format!("{}", item.borrow().display_with(grammar, &lex, inline_map)))
+            .collect()
     }
 }

From 5eb88069597ed72d9dd6b4f5b2ed5d772463a853 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 20 Dec 2018 13:36:21 -0800
Subject: [PATCH 072/208] Handle repetition ranges in regexes

---
 src/prepare_grammar/expand_tokens.rs | 114 ++++++++++++++++++++-------
 1 file changed, 86 insertions(+), 28 deletions(-)

diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 7a1d2f4d..37f75e5a 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -3,7 +3,9 @@ use crate::error::{Error, Result};
 use crate::grammars::{LexicalGrammar, LexicalVariable};
 use crate::nfa::{CharacterSet, Nfa, NfaState};
 use crate::rules::Rule;
-use regex_syntax::ast::{parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind};
+use regex_syntax::ast::{
+    parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
+};
 
 pub(super) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
     let mut nfa = Nfa::new();
@@ -24,7 +26,10 @@ pub(super) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
 
         nfa.states.push(NfaState::Accept(i));
         let last_state_id = nfa.last_state_id();
-        expand_rule(&variable.rule, &mut nfa, last_state_id, false)?;
+        expand_rule(&variable.rule, &mut nfa, last_state_id, false).map_err(|e| match e {
+            Error::RegexError(msg) => Error::RegexError(format!("Rule {} {}", variable.name, msg)),
+            _ => e,
+        })?;
 
         if !is_immediate_token {
             let last_state_id = nfa.last_state_id();
@@ -95,11 +100,62 @@ fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool)
                 Ok(false)
             }
         }
+        Rule::Metadata { rule, .. } => {
+            // TODO - implement precedence
+            expand_rule(rule, nfa, next_state_id, is_sep)
+        }
         Rule::Blank => Ok(false),
         _ => Err(Error::grammar(&format!("Unexpected rule {:?}", rule))),
     }
 }
 
+fn expand_one_or_more(ast: &Ast, nfa: &mut Nfa, next_state_id: u32, is_sep: bool) -> Result<bool> {
+    nfa.states.push(NfaState::Accept(0)); // Placeholder for split
+    let split_state_id = nfa.last_state_id();
+    if expand_regex(&ast, nfa, split_state_id, is_sep)? {
+        nfa.states[split_state_id as usize] = NfaState::Split(nfa.last_state_id(), next_state_id);
+        Ok(true)
+    } else {
+        nfa.states.pop();
+        Ok(false)
+    }
+}
+
+fn expand_zero_or_one(ast: &Ast, nfa: &mut Nfa, next_state_id: u32, is_sep: bool) -> Result<bool> {
+    if expand_regex(ast, nfa, next_state_id, is_sep)? {
+        nfa.prepend(|last_state_id| NfaState::Split(next_state_id, last_state_id));
+        Ok(true)
+    } else {
+        Ok(false)
+    }
+}
+
+fn expand_zero_or_more(ast: &Ast, nfa: &mut Nfa, next_state_id: u32, is_sep: bool) -> Result<bool> {
+    if expand_one_or_more(&ast, nfa, next_state_id, is_sep)? {
+        nfa.prepend(|last_state_id| NfaState::Split(last_state_id, next_state_id));
+        Ok(true)
+    } else {
+        Ok(false)
+    }
+}
+
+fn expand_count(
+    ast: &Ast,
+    count: u32,
+    nfa: &mut Nfa,
+    mut next_state_id: u32,
+    is_sep: bool,
+) -> Result<bool> {
+    let mut result = false;
+    for _ in 0..count {
+        if expand_regex(ast, nfa, next_state_id, is_sep)? {
+            result = true;
+            next_state_id = nfa.last_state_id();
+        }
+    }
+    Ok(result)
+}
+
 fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool) -> Result<bool> {
     match ast {
         Ast::Empty(_) => Ok(false),
@@ -148,38 +204,36 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool)
         },
         Ast::Repetition(repetition) => match repetition.op.kind {
             RepetitionKind::ZeroOrOne => {
-                if expand_regex(&repetition.ast, nfa, next_state_id, is_sep)? {
-                    nfa.prepend(|last_state_id| NfaState::Split(next_state_id, last_state_id));
-                    Ok(true)
-                } else {
-                    Ok(false)
-                }
+                expand_zero_or_one(&repetition.ast, nfa, next_state_id, is_sep)
             }
             RepetitionKind::OneOrMore => {
-                nfa.states.push(NfaState::Accept(0)); // Placeholder for split
-                let split_state_id = nfa.last_state_id();
-                if expand_regex(&repetition.ast, nfa, split_state_id, is_sep)? {
-                    nfa.states[split_state_id as usize] =
-                        NfaState::Split(nfa.last_state_id(), next_state_id);
-                    Ok(true)
-                } else {
-                    nfa.states.pop();
-                    Ok(false)
-                }
+                expand_one_or_more(&repetition.ast, nfa, next_state_id, is_sep)
             }
             RepetitionKind::ZeroOrMore => {
-                nfa.states.push(NfaState::Accept(0)); // Placeholder for split
-                let split_state_id = nfa.last_state_id();
-                if expand_regex(&repetition.ast, nfa, split_state_id, is_sep)? {
-                    nfa.states[split_state_id as usize] =
-                        NfaState::Split(nfa.last_state_id(), next_state_id);
-                    nfa.prepend(|last_state_id| NfaState::Split(last_state_id, next_state_id));
-                    Ok(true)
+                expand_zero_or_more(&repetition.ast, nfa, next_state_id, is_sep)
+            }
+            RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
+                expand_count(&repetition.ast, count, nfa, next_state_id, is_sep)
+            }
+            RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
+                if expand_zero_or_more(&repetition.ast, nfa, next_state_id, is_sep)? {
+                    expand_count(ast, min, nfa, next_state_id, is_sep)
                 } else {
                     Ok(false)
                 }
             }
-            RepetitionKind::Range(_) => unimplemented!(),
+            RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
+                let mut result = expand_count(&repetition.ast, min, nfa, next_state_id, is_sep)?;
+                for _ in min..max {
+                    if result {
+                        next_state_id = nfa.last_state_id();
+                    }
+                    if expand_zero_or_one(&repetition.ast, nfa, next_state_id, is_sep)? {
+                        result = true;
+                    }
+                }
+                Ok(result)
+            }
         },
         Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.last_state_id(), is_sep),
         Ast::Alternation(alternation) => {
@@ -202,8 +256,8 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool)
             for ast in concat.asts.iter().rev() {
                 if expand_regex(&ast, nfa, next_state_id, is_sep)? {
                     result = true;
+                    next_state_id = nfa.last_state_id();
                 }
-                next_state_id = nfa.last_state_id();
             }
             Ok(result)
         }
@@ -224,7 +278,11 @@ fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
             }
             Ok(result)
         }
-        _ => Err(Error::regex("Unsupported character class syntax")),
+        ClassSetItem::Perl(class) => Ok(expand_perl_character_class(&class.kind)),
+        _ => Err(Error::regex(&format!(
+            "Unsupported character class syntax {:?}",
+            item
+        ))),
     }
 }
 

From a3dcfa0a52b74fc56a53aef270bd9f4a474732e8 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 20 Dec 2018 13:36:39 -0800
Subject: [PATCH 073/208] Implement more of parse table generation

---
 src/build_tables/item.rs                      | 260 +++++++-
 src/build_tables/item_set_builder.rs          |  34 +-
 src/build_tables/mod.rs                       | 596 +++++++++++++++++-
 src/error.rs                                  |   1 +
 src/generate.rs                               |   4 +-
 src/grammars.rs                               |  16 +-
 src/js/dsl.js                                 | 334 ++++++++++
 src/main.rs                                   |  65 +-
 src/nfa.rs                                    |   6 +
 src/prepare_grammar/extract_simple_aliases.rs |   2 +
 src/prepare_grammar/extract_tokens.rs         |   7 +-
 src/render/mod.rs                             | 206 +++++-
 src/rules.rs                                  |  23 +-
 src/tables.rs                                 |  68 +-
 14 files changed, 1515 insertions(+), 107 deletions(-)
 create mode 100644 src/js/dsl.js

diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index c99815eb..9208f602 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -1,10 +1,10 @@
 use super::inline_variables::InlinedProductionMap;
-use crate::grammars::{Production, ProductionStep, SyntaxGrammar};
-use crate::rules::{Symbol, SymbolType};
+use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
+use crate::rules::{Associativity, Symbol, SymbolType};
 use smallbitvec::SmallBitVec;
-use std::collections::HashMap;
-use std::hash::{Hash, Hasher};
+use std::collections::{HashMap, BTreeMap};
 use std::fmt;
+use std::hash::{Hash, Hasher};
 
 lazy_static! {
     static ref START_PRODUCTION: Production = Production {
@@ -28,7 +28,7 @@ pub(crate) struct LookaheadSet {
     eof: bool,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) enum ParseItem {
     Start {
         step_index: u32,
@@ -47,10 +47,29 @@ pub(crate) enum ParseItem {
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct ParseItemSet {
-    pub entries: HashMap<ParseItem, LookaheadSet>,
+    pub entries: BTreeMap<ParseItem, LookaheadSet>,
 }
 
-pub(crate) struct ParseItemDisplay<'a>(&'a ParseItem, &'a SyntaxGrammar, &'a InlinedProductionMap);
+pub(crate) struct ParseItemDisplay<'a>(
+    &'a ParseItem,
+    &'a SyntaxGrammar,
+    &'a LexicalGrammar,
+    &'a InlinedProductionMap,
+);
+
+pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
+
+pub(crate) struct ParseItemSetDisplay<'a>(
+    &'a ParseItemSet,
+    &'a SyntaxGrammar,
+    &'a LexicalGrammar,
+    &'a InlinedProductionMap,
+);
+
+struct ParseItemSetMapEntry(ParseItemSet, u64);
+pub(crate) struct ParseItemSetMap<T> {
+    map: HashMap<ParseItemSetMapEntry, T>
+}
 
 impl LookaheadSet {
     pub fn new() -> Self {
@@ -61,12 +80,61 @@ impl LookaheadSet {
         }
     }
 
-    pub fn insert(&mut self, other: Symbol) {
-        match other.kind {
-            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
-            SymbolType::Terminal => self.terminal_bits.set(other.index, true),
-            SymbolType::External => self.external_bits.set(other.index, true),
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
+        self.terminal_bits
+            .iter()
+            .enumerate()
+            .filter_map(|(i, value)| {
+                if value {
+                    Some(Symbol::terminal(i))
+                } else {
+                    None
+                }
+            })
+            .chain(
+                self.external_bits
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(i, value)| {
+                        if value {
+                            Some(Symbol::external(i))
+                        } else {
+                            None
+                        }
+                    }),
+            )
+            .chain(if self.eof { Some(Symbol::end()) } else { None })
+    }
+
+    pub fn with<'a>(symbols: impl IntoIterator<Item = &'a Symbol>) -> Self {
+        let mut result = Self::new();
+        for symbol in symbols {
+            result.insert(*symbol);
         }
+        result
+    }
+
+    pub fn contains(&self, symbol: &Symbol) -> bool {
+        match symbol.kind {
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
+            SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
+            SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
+            SymbolType::End => self.eof,
+        }
+    }
+
+    pub fn insert(&mut self, other: Symbol) {
+        let vec = match other.kind {
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
+            SymbolType::Terminal => &mut self.terminal_bits,
+            SymbolType::External => &mut self.external_bits,
+            SymbolType::End => {
+                self.eof = true;
+                return;
+            }
+        };
+        vec.resize(other.index + 1, false);
+        vec.set(other.index, true);
     }
 
     pub fn insert_all(&mut self, other: &LookaheadSet) -> bool {
@@ -95,6 +163,14 @@ impl LookaheadSet {
         }
         result
     }
+
+    pub fn display_with<'a>(
+        &'a self,
+        syntax_grammar: &'a SyntaxGrammar,
+        lexical_grammar: &'a LexicalGrammar,
+    ) -> LookaheadSetDisplay<'a> {
+        LookaheadSetDisplay(self, syntax_grammar, lexical_grammar)
+    }
 }
 
 impl ParseItem {
@@ -126,18 +202,53 @@ impl ParseItem {
                 &grammar.variables[*variable_index as usize].productions[*production_index as usize]
             }
             ParseItem::Inlined {
-                production_index,
-                ..
+                production_index, ..
             } => &inlined_productions.inlined_productions[*production_index as usize],
         }
     }
 
+    pub fn symbol(
+        &self,
+        grammar: &SyntaxGrammar,
+        inlined_productions: &InlinedProductionMap,
+    ) -> Option<Symbol> {
+        self.step(grammar, inlined_productions).map(|s| s.symbol)
+    }
+
     pub fn step<'a>(
         &self,
         grammar: &'a SyntaxGrammar,
         inlined_productions: &'a InlinedProductionMap,
     ) -> Option<&'a ProductionStep> {
-        self.production(grammar, inlined_productions).steps.get(self.step_index())
+        self.production(grammar, inlined_productions)
+            .steps
+            .get(self.step_index())
+    }
+
+    pub fn precedence<'a>(
+        &self,
+        grammar: &'a SyntaxGrammar,
+        inlines: &'a InlinedProductionMap,
+    ) -> i32 {
+        self.production(grammar, inlines)
+            .steps
+            .get(self.step_index() - 1)
+            .map(|s| s.precedence)
+            .unwrap_or(0)
+    }
+
+    pub fn associativity<'a>(
+        &self,
+        grammar: &'a SyntaxGrammar,
+        inlines: &'a InlinedProductionMap,
+    ) -> Option<Associativity> {
+        let production = self.production(grammar, inlines);
+        let step_index = self.step_index();
+        if step_index == production.steps.len() {
+            production.steps.last().and_then(|s| s.associativity)
+        } else {
+            None
+        }
     }
 
     pub fn variable_index(&self) -> u32 {
@@ -156,6 +267,14 @@ impl ParseItem {
         }
     }
 
+    pub fn is_final(&self) -> bool {
+        if let ParseItem::Start { step_index: 1 } = self {
+            true
+        } else {
+            false
+        }
+    }
+
     fn step_index_mut(&mut self) -> &mut u32 {
         match self {
             ParseItem::Start { step_index }
@@ -164,8 +283,13 @@ impl ParseItem {
         }
     }
 
-    pub fn with<'a>(&'a self, grammar: &'a SyntaxGrammar, inlines: &'a InlinedProductionMap) -> ParseItemDisplay<'a> {
-        ParseItemDisplay(self, grammar, inlines)
+    pub fn display_with<'a>(
+        &'a self,
+        syntax_grammar: &'a SyntaxGrammar,
+        lexical_grammar: &'a LexicalGrammar,
+        inlines: &'a InlinedProductionMap,
+    ) -> ParseItemDisplay<'a> {
+        ParseItemDisplay(self, syntax_grammar, lexical_grammar, inlines)
     }
 
     pub fn successor(&self) -> ParseItem {
@@ -176,33 +300,107 @@ impl ParseItem {
 }
 
 impl ParseItemSet {
-    pub fn new() -> Self {
-        Self { entries: HashMap::new() }
+    pub fn with<'a>(elements: impl IntoIterator<Item = &'a (ParseItem, LookaheadSet)>) -> Self {
+        let mut result = Self::default();
+        for (item, lookaheads) in elements {
+            result.entries.insert(*item, lookaheads.clone());
+        }
+        result
+    }
+
+    pub fn display_with<'a>(
+        &'a self,
+        syntax_grammar: &'a SyntaxGrammar,
+        lexical_grammar: &'a LexicalGrammar,
+        inlines: &'a InlinedProductionMap,
+    ) -> ParseItemSetDisplay<'a> {
+        ParseItemSetDisplay(self, syntax_grammar, lexical_grammar, inlines)
+    }
+}
+
+impl Default for ParseItemSet {
+    fn default() -> Self {
+        Self {
+            entries: BTreeMap::new(),
+        }
     }
 }
 
 impl<'a> fmt::Display for ParseItemDisplay<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        if let ParseItem::Start { .. } = &self.0 {
+            write!(f, "START →")?;
+        } else {
+            write!(
+                f,
+                "{} →",
+                &self.1.variables[self.0.variable_index() as usize].name
+            )?;
+        }
+
         let step_index = self.0.step_index();
-        let production = self.0.production(self.1, self.2);
+        let production = self.0.production(self.1, self.3);
         for (i, step) in production.steps.iter().enumerate() {
-            if i > 0 {
-                write!(f, " ")?;
-            }
-
             if i == step_index {
-                write!(f, "• ")?;
+                write!(f, " •")?;
             }
 
-            let name = if step.symbol.is_terminal() {
-                "terminal"
+            write!(f, " ")?;
+            if step.symbol.is_terminal() {
+                if let Some(variable) = self.2.variables.get(step.symbol.index) {
+                    write!(f, "{}", &variable.name)?;
+                } else {
+                    write!(f, "{}-{}", "terminal", step.symbol.index)?;
+                }
             } else if step.symbol.is_external() {
-                "external"
+                write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
             } else {
-                "non-terminal"
-            };
+                write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
+            }
+        }
 
-            write!(f, "{}-{}", name, step.symbol.index)?;
+        if production.steps.len() == step_index {
+            write!(f, " •")?;
+        }
+
+        Ok(())
+    }
+}
+
+impl<'a> fmt::Display for LookaheadSetDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "[")?;
+        for (i, symbol) in self.0.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+
+            if symbol.is_terminal() {
+                if let Some(variable) = self.2.variables.get(symbol.index) {
+                    write!(f, "{}", &variable.name)?;
+                } else {
+                    write!(f, "{}-{}", "terminal", symbol.index)?;
+                }
+            } else if symbol.is_external() {
+                write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
+            } else {
+                write!(f, "{}", &self.1.variables[symbol.index].name)?;
+            }
+        }
+        write!(f, "]")?;
+        Ok(())
+    }
+}
+
+impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        for (item, lookaheads) in self.0.entries.iter() {
+            writeln!(
+                f,
+                "{}\t{}",
+                item.display_with(self.1, self.2, self.3),
+                lookaheads.display_with(self.1, self.2)
+            )?;
         }
         Ok(())
     }
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
index 61d45ded..530c1f25 100644
--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@@ -20,7 +20,7 @@ pub(crate) struct ParseItemSetBuilder {
     first_sets: HashMap<Symbol, LookaheadSet>,
     last_sets: HashMap<Symbol, LookaheadSet>,
     transitive_closure_additions: Vec<Vec<TransitiveClosureAddition>>,
-    inlined_production_map: InlinedProductionMap,
+    pub inlines: InlinedProductionMap,
 }
 
 fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
@@ -35,7 +35,7 @@ impl ParseItemSetBuilder {
             first_sets: HashMap::new(),
             last_sets: HashMap::new(),
             transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
-            inlined_production_map: InlinedProductionMap::new(syntax_grammar),
+            inlines: InlinedProductionMap::new(syntax_grammar),
         };
 
         // For each grammar symbol, populate the FIRST and LAST sets: the set of
@@ -192,6 +192,10 @@ impl ParseItemSetBuilder {
             let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
             for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
                 let variable = &syntax_grammar.variables[variable_index];
+                let non_terminal = Symbol::non_terminal(variable_index);
+                if syntax_grammar.variables_to_inline.contains(&non_terminal) {
+                    continue;
+                }
                 for production_index in 0..variable.productions.len() {
                     let item = ParseItem::Normal {
                         variable_index: variable_index as u32,
@@ -199,7 +203,7 @@ impl ParseItemSetBuilder {
                         step_index: 0,
                     };
 
-                    if let Some(inlined_items) = result.inlined_production_map.inlined_items(item) {
+                    if let Some(inlined_items) = result.inlines.inlined_items(item) {
                         for inlined_item in inlined_items {
                             find_or_push(
                                 additions_for_non_terminal,
@@ -227,32 +231,36 @@ impl ParseItemSetBuilder {
 
     pub(crate) fn transitive_closure(
         &mut self,
-        item_set: ParseItemSet,
+        item_set: &ParseItemSet,
         grammar: &SyntaxGrammar,
     ) -> ParseItemSet {
-        let mut result = ParseItemSet::new();
-        for (item, lookaheads) in item_set.entries {
-            if let Some(items) = self.inlined_production_map.inlined_items(item) {
+        let mut result = ParseItemSet::default();
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(items) = self.inlines.inlined_items(*item) {
                 for item in items {
-                    self.add_item(&mut result, item, lookaheads.clone(), grammar);
+                    self.add_item(&mut result, item, lookaheads, grammar);
                 }
             } else {
-                self.add_item(&mut result, item, lookaheads, grammar);
+                self.add_item(&mut result, *item, lookaheads, grammar);
             }
         }
         result
     }
 
+    pub fn first_set(&self, symbol: &Symbol) -> &LookaheadSet {
+        &self.first_sets[symbol]
+    }
+
     fn add_item(
         &self,
         set: &mut ParseItemSet,
         item: ParseItem,
-        lookaheads: LookaheadSet,
+        lookaheads: &LookaheadSet,
         grammar: &SyntaxGrammar,
     ) {
-        if let Some(step) = item.step(grammar, &self.inlined_production_map) {
+        if let Some(step) = item.step(grammar, &self.inlines) {
             if step.symbol.is_non_terminal() {
-                let next_step = item.successor().step(grammar, &self.inlined_production_map);
+                let next_step = item.successor().step(grammar, &self.inlines);
 
                 // Determine which tokens can follow this non-terminal.
                 let following_tokens = if let Some(next_step) = next_step {
@@ -274,6 +282,6 @@ impl ParseItemSetBuilder {
                 }
             }
         }
-        set.entries.insert(item, lookaheads);
+        set.entries.insert(item, lookaheads.clone());
     }
 }
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 01d9219d..091c5486 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -1,37 +1,611 @@
-mod item;
 mod inline_variables;
 mod item;
 mod item_set_builder;
 
-use std::collections::{HashMap, VecDeque};
-use crate::grammars::{SyntaxGrammar, LexicalGrammar};
-use crate::tables::{ParseTable, LexTable, ParseStateId};
-use crate::rules::{AliasMap, Symbol};
-use crate::error::Result;
-use self::item::ParseItemSet;
+use self::item::{LookaheadSet, ParseItem, ParseItemSet};
+use self::item_set_builder::ParseItemSetBuilder;
+use crate::error::{Error, Result};
+use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::rules::{AliasMap, Associativity, Symbol, SymbolType};
+use crate::tables::ParseTableEntry;
+use crate::tables::{AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable};
+use core::ops::Range;
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, HashSet, VecDeque};
+use std::fmt::Write;
+
+#[derive(Clone)]
+struct AuxiliarySymbolInfo {
+    auxiliary_symbol: Symbol,
+    parent_symbols: Vec<Symbol>,
+}
 
 type SymbolSequence = Vec<Symbol>;
+type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
 
 struct ParseStateQueueEntry {
     preceding_symbols: SymbolSequence,
-    item_set: ParseItemSet,
+    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
     state_id: ParseStateId,
 }
 
 struct ParseTableBuilder<'a> {
+    item_set_builder: ParseItemSetBuilder,
     syntax_grammar: &'a SyntaxGrammar,
     lexical_grammar: &'a LexicalGrammar,
     simple_aliases: &'a AliasMap,
     state_ids_by_item_set: HashMap<ParseItemSet, ParseStateId>,
-    item_sets_by_state_id: Vec<&'a ParseItemSet>,
+    item_sets_by_state_id: Vec<ParseItemSet>,
     parse_state_queue: VecDeque<ParseStateQueueEntry>,
     parse_table: ParseTable,
 }
 
+impl<'a> ParseTableBuilder<'a> {
+    fn build(mut self) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
+        // Ensure that the empty rename sequence has index 0.
+        self.parse_table.alias_sequences.push(Vec::new());
+
+        // Ensure that the error state has index 0.
+        let error_state_id = self.add_parse_state(
+            &Vec::new(),
+            &Vec::new(),
+            ParseItemSet::default(),
+        );
+
+        self.add_parse_state(
+            &Vec::new(),
+            &Vec::new(),
+            ParseItemSet::with(&[(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]),
+        );
+
+        self.process_part_state_queue()?;
+        self.populate_used_symbols();
+
+        Err(Error::grammar("oh no"))
+    }
+
+    fn add_parse_state(
+        &mut self,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
+        item_set: ParseItemSet,
+    ) -> ParseStateId {
+        match self.state_ids_by_item_set.entry(item_set) {
+            Entry::Occupied(o) => {
+                // eprintln!("Item set already processed at state {}", *o.get());
+                *o.get()
+            }
+            Entry::Vacant(v) => {
+                // eprintln!("Item set not yet processed");
+                let state_id = self.parse_table.states.len();
+                self.item_sets_by_state_id.push(v.key().clone());
+                self.parse_table.states.push(ParseState {
+                    terminal_entries: HashMap::new(),
+                    nonterminal_entries: HashMap::new(),
+                });
+                self.parse_state_queue.push_back(ParseStateQueueEntry {
+                    state_id,
+                    preceding_symbols: preceding_symbols.clone(),
+                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
+                });
+                v.insert(state_id);
+                state_id
+            }
+        }
+    }
+
+    fn process_part_state_queue(&mut self) -> Result<()> {
+        while let Some(entry) = self.parse_state_queue.pop_front() {
+            println!(
+                "ITEM SET {}:\n{}",
+                entry.state_id,
+                self.item_sets_by_state_id[entry.state_id].display_with(
+                    &self.syntax_grammar,
+                    &self.lexical_grammar,
+                    &self.item_set_builder.inlines
+                )
+            );
+
+            let item_set = self.item_set_builder.transitive_closure(
+                &self.item_sets_by_state_id[entry.state_id],
+                self.syntax_grammar,
+            );
+
+            // println!("TRANSITIVE CLOSURE:");
+            // for item in item_set.entries.keys() {
+            //     println!("{}", item.display_with(&self.syntax_grammar, &self.lexical_grammar, &self.item_set_builder.inlines));
+            // }
+            // println!("");
+
+            self.add_actions(
+                entry.preceding_symbols,
+                entry.preceding_auxiliary_symbols,
+                item_set,
+                entry.state_id,
+            )?;
+        }
+        Ok(())
+    }
+
+    fn add_actions(
+        &mut self,
+        mut preceding_symbols: SymbolSequence,
+        mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
+        item_set: ParseItemSet,
+        state_id: ParseStateId,
+    ) -> Result<()> {
+        let mut terminal_successors = HashMap::new();
+        let mut non_terminal_successors = HashMap::new();
+        let mut lookaheads_with_conflicts = HashSet::new();
+
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(next_symbol) =
+                item.symbol(self.syntax_grammar, &self.item_set_builder.inlines)
+            {
+                let successor = item.successor();
+                if next_symbol.is_non_terminal() {
+                    // Keep track of where auxiliary non-terminals (repeat symbols) are
+                    // used within visible symbols. This information may be needed later
+                    // for conflict resolution.
+                    if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
+                        preceding_auxiliary_symbols
+                            .push(self.get_auxiliary_node_info(&item_set, next_symbol));
+                    }
+
+                    non_terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .entries
+                        .entry(successor)
+                        .or_insert_with(|| LookaheadSet::new())
+                        .insert_all(lookaheads);
+                } else {
+                    terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .entries
+                        .entry(successor)
+                        .or_insert_with(|| LookaheadSet::new())
+                        .insert_all(lookaheads);
+                }
+            } else {
+                let action = if item.is_final() {
+                    ParseAction::Accept
+                } else {
+                    let production =
+                        item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
+                    ParseAction::Reduce {
+                        symbol: Symbol::non_terminal(item.variable_index() as usize),
+                        child_count: item.step_index(),
+                        precedence: production.last_precedence(),
+                        associativity: production.last_associativity(),
+                        dynamic_precedence: production.dynamic_precedence,
+                        alias_sequence_id: self.get_alias_sequence_id(item),
+                    }
+                };
+
+                for lookahead in lookaheads.iter() {
+                    let entry = self.parse_table.states[state_id]
+                        .terminal_entries
+                        .entry(lookahead);
+                    let entry = entry.or_insert_with(|| ParseTableEntry::new());
+                    if entry.actions.is_empty() {
+                        entry.actions.push(action);
+                    } else if action.precedence() > entry.actions[0].precedence() {
+                        entry.actions.clear();
+                        entry.actions.push(action);
+                        lookaheads_with_conflicts.remove(&lookahead);
+                    } else if action.precedence() == entry.actions[0].precedence() {
+                        entry.actions.push(action);
+                        lookaheads_with_conflicts.insert(lookahead);
+                    }
+                }
+            }
+        }
+
+        for (symbol, next_item_set) in terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+
+            let entry = self.parse_table.states[state_id]
+                .terminal_entries
+                .entry(symbol);
+            if let Entry::Occupied(e) = &entry {
+                if !e.get().actions.is_empty() {
+                    lookaheads_with_conflicts.insert(symbol);
+                }
+            }
+
+            entry
+                .or_insert_with(|| ParseTableEntry::new())
+                .actions
+                .push(ParseAction::Shift {
+                    state: next_state_id,
+                    is_repetition: false,
+                });
+        }
+
+        for (symbol, next_item_set) in non_terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+            self.parse_table.states[state_id]
+                .nonterminal_entries
+                .insert(symbol, next_state_id);
+        }
+
+        for symbol in lookaheads_with_conflicts {
+            self.handle_conflict(
+                &item_set,
+                state_id,
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                symbol,
+            )?;
+        }
+
+        Ok(())
+    }
+
+    fn handle_conflict(
+        &mut self,
+        item_set: &ParseItemSet,
+        state_id: ParseStateId,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
+        conflicting_lookahead: Symbol,
+    ) -> Result<()> {
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+
+        // Determine which items in the set conflict with each other, and the
+        // precedences associated with SHIFT vs REDUCE actions. There won't
+        // be multiple REDUCE actions with different precedences; that is
+        // sorted out ahead of time in `add_actions`. But there can still be
+        // REDUCE-REDUCE conflicts where all actions have the *same*
+        // precedence, and there can still be SHIFT/REDUCE conflicts.
+        let reduce_precedence = entry.actions[0].precedence();
+        let mut considered_associativity = false;
+        let mut shift_precedence: Option<Range<i32>> = None;
+        let mut conflicting_items = HashSet::new();
+        for (item, lookaheads) in &item_set.entries {
+            let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
+            let step_index = item.step_index();
+            if let Some(step) = production.steps.get(step_index) {
+                if step_index > 0 {
+                    if self
+                        .item_set_builder
+                        .first_set(&step.symbol)
+                        .contains(&conflicting_lookahead)
+                    {
+                        conflicting_items.insert(item);
+                        let precedence = production.steps[step_index - 1].precedence;
+                        if let Some(range) = &mut shift_precedence {
+                            if precedence < range.start {
+                                range.start = precedence;
+                            } else if precedence > range.end {
+                                range.end = precedence;
+                            }
+                        } else {
+                            shift_precedence = Some(precedence..precedence);
+                        }
+                    }
+                }
+            } else if lookaheads.contains(&conflicting_lookahead) {
+                conflicting_items.insert(item);
+            }
+        }
+
+        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
+            let shift_precedence = shift_precedence.unwrap_or(0..0);
+
+            // If all of the items in the conflict have the same parent symbol,
+            // and that parent symbols is auxiliary, then this is just the intentional
+            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
+            // by leaving it in the parse table, but marking the SHIFT action with
+            // an `is_repetition` flag.
+            let conflicting_variable_index =
+                conflicting_items.iter().next().unwrap().variable_index();
+            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
+                if conflicting_items
+                    .iter()
+                    .all(|item| item.variable_index() == conflicting_variable_index)
+                {
+                    *is_repetition = true;
+                    return Ok(());
+                }
+            }
+
+            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
+            if shift_precedence.start > reduce_precedence
+                || (shift_precedence.start == reduce_precedence
+                    && shift_precedence.end > reduce_precedence)
+            {
+                entry.actions.drain(0..entry.actions.len() - 1);
+            }
+            // If the REDUCE actions have higher precedence, remove the SHIFT action.
+            else if shift_precedence.end < reduce_precedence
+                || (shift_precedence.end == reduce_precedence
+                    && shift_precedence.start < reduce_precedence)
+            {
+                entry.actions.pop();
+                conflicting_items.retain(|item| {
+                    item.step(&self.syntax_grammar, &self.item_set_builder.inlines)
+                        .is_none()
+                });
+            }
+            // If the SHIFT and REDUCE actions have the same predence, consider
+            // the REDUCE actions' associativity.
+            else if shift_precedence == (reduce_precedence..reduce_precedence) {
+                considered_associativity = true;
+                let mut has_left = false;
+                let mut has_right = false;
+                let mut has_non = false;
+                for action in &entry.actions {
+                    if let ParseAction::Reduce { associativity, .. } = action {
+                        match associativity {
+                            Some(Associativity::Left) => has_left = true,
+                            Some(Associativity::Right) => has_right = true,
+                            None => has_non = true,
+                        }
+                    }
+                }
+
+                // If all reduce actions are left associative, remove the SHIFT action.
+                // If all reduce actions are right associative, remove the REDUCE actions.
+                match (has_left, has_non, has_right) {
+                    (true, false, false) => {
+                        entry.actions.pop();
+                        conflicting_items.retain(|item| {
+                            item.step(&self.syntax_grammar, &self.item_set_builder.inlines)
+                                .is_none()
+                        });
+                    }
+                    (false, false, true) => {
+                        entry.actions.drain(0..entry.actions.len() - 1);
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        // If all of the actions but one have been eliminated, then there's no problem.
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+        if entry.actions.len() == 1 {
+            return Ok(());
+        }
+
+        // Determine the set of parent symbols involved in this conflict.
+        let mut actual_conflict = Vec::new();
+        for item in &conflicting_items {
+            let symbol = Symbol::non_terminal(item.variable_index() as usize);
+            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
+                actual_conflict.extend(
+                    preceding_auxiliary_symbols
+                        .iter()
+                        .rev()
+                        .find_map(|info| {
+                            if info.auxiliary_symbol == symbol {
+                                Some(&info.parent_symbols)
+                            } else {
+                                None
+                            }
+                        })
+                        .unwrap()
+                        .iter(),
+                );
+            } else {
+                actual_conflict.push(symbol);
+            }
+        }
+        actual_conflict.sort_unstable();
+        actual_conflict.dedup();
+
+        // If this set of symbols has been whitelisted, then there's no error.
+        if self
+            .syntax_grammar
+            .expected_conflicts
+            .contains(&actual_conflict)
+        {
+            return Ok(());
+        }
+
+        let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
+        for symbol in preceding_symbols {
+            write!(&mut msg, "  {}", self.symbol_name(symbol)).unwrap();
+        }
+
+        write!(
+            &mut msg,
+            "  •  {}  …\n\n",
+            self.symbol_name(&conflicting_lookahead)
+        )
+        .unwrap();
+        write!(&mut msg, "Possible interpretations:\n").unwrap();
+        for (i, item) in conflicting_items.iter().enumerate() {
+            write!(&mut msg, "\n  {}:", i).unwrap();
+
+            for preceding_symbol in preceding_symbols
+                .iter()
+                .take(preceding_symbols.len() - item.step_index())
+            {
+                write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
+            }
+
+            write!(
+                &mut msg,
+                "  ({}",
+                &self.syntax_grammar.variables[item.variable_index() as usize].name
+            )
+            .unwrap();
+
+            for (j, step) in item
+                .production(&self.syntax_grammar, &self.item_set_builder.inlines)
+                .steps
+                .iter()
+                .enumerate()
+            {
+                if j == item.step_index() {
+                    write!(&mut msg, "  •").unwrap();
+                }
+                write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
+            }
+
+            write!(&mut msg, ")").unwrap();
+
+            if item
+                .step(&self.syntax_grammar, &self.item_set_builder.inlines)
+                .is_none()
+            {
+                write!(
+                    &mut msg,
+                    "  •  {}",
+                    self.symbol_name(&conflicting_lookahead)
+                )
+                .unwrap();
+            }
+
+            let precedence = item.precedence(&self.syntax_grammar, &self.item_set_builder.inlines);
+            let associativity =
+                item.associativity(&self.syntax_grammar, &self.item_set_builder.inlines);
+            if precedence != 0 || associativity.is_some() {
+                write!(
+                    &mut msg,
+                    "(precedence: {}, associativity: {:?})",
+                    precedence, associativity
+                )
+                .unwrap();
+            }
+        }
+
+        // TODO - generate suggested resolutions
+
+        Err(Error::ConflictError(msg))
+    }
+
+    fn get_auxiliary_node_info(
+        &self,
+        item_set: &ParseItemSet,
+        symbol: Symbol,
+    ) -> AuxiliarySymbolInfo {
+        let parent_symbols = item_set
+            .entries
+            .keys()
+            .filter_map(|item| {
+                if item.symbol(&self.syntax_grammar, &self.item_set_builder.inlines) == Some(symbol)
+                {
+                    None
+                } else {
+                    None
+                }
+            })
+            .collect();
+        AuxiliarySymbolInfo {
+            auxiliary_symbol: symbol,
+            parent_symbols,
+        }
+    }
+
+    fn populate_used_symbols(&mut self) {
+        let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
+        let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
+        let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
+        for state in &self.parse_table.states {
+            for symbol in state.terminal_entries.keys() {
+                match symbol.kind {
+                    SymbolType::Terminal => terminal_usages[symbol.index] = true,
+                    SymbolType::External => external_usages[symbol.index] = true,
+                    _ => {}
+                }
+            }
+            for symbol in state.nonterminal_entries.keys() {
+                non_terminal_usages[symbol.index] = true;
+            }
+        }
+        for (i, value) in terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::terminal(i));
+            }
+        }
+        for (i, value) in non_terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::non_terminal(i));
+            }
+        }
+        for (i, value) in external_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::external(i));
+            }
+        }
+    }
+
+    fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
+        let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
+        let alias_sequence = production.steps.iter().map(|s| s.alias.clone()).collect();
+        if let Some(index) = self
+            .parse_table
+            .alias_sequences
+            .iter()
+            .position(|seq| *seq == alias_sequence)
+        {
+            index
+        } else {
+            self.parse_table.alias_sequences.push(alias_sequence);
+            self.parse_table.alias_sequences.len() - 1
+        }
+    }
+
+    fn symbol_name(&self, symbol: &Symbol) -> String {
+        match symbol.kind {
+            SymbolType::End => "EOF".to_string(),
+            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
+                .name
+                .clone(),
+            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
+            SymbolType::Terminal => {
+                let variable = &self.lexical_grammar.variables[symbol.index];
+                if variable.kind == VariableType::Named {
+                    variable.name.clone()
+                } else {
+                    format!("\"{}\"", &variable.name)
+                }
+            }
+        }
+    }
+}
+
 pub(crate) fn build_tables(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
-    simple_aliases: &AliasMap
+    simple_aliases: &AliasMap,
 ) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
-    unimplemented!();
+    ParseTableBuilder {
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases,
+        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar),
+        state_ids_by_item_set: HashMap::new(),
+        item_sets_by_state_id: Vec::new(),
+        parse_state_queue: VecDeque::new(),
+        parse_table: ParseTable {
+            states: Vec::new(),
+            alias_sequences: Vec::new(),
+            symbols: Vec::new(),
+        },
+    }
+    .build()
 }
diff --git a/src/error.rs b/src/error.rs
index 49064c22..b03efa93 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -3,6 +3,7 @@ pub enum Error {
     GrammarError(String),
     SymbolError(String),
     RegexError(String),
+    ConflictError(String),
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/src/generate.rs b/src/generate.rs
index 4507fb6f..dc3d5176 100644
--- a/src/generate.rs
+++ b/src/generate.rs
@@ -4,8 +4,8 @@ use crate::prepare_grammar::prepare_grammar;
 use crate::build_tables::build_tables;
 use crate::render::render_c_code;
 
-pub fn generate_parser_for_grammar(input: String) -> Result<String> {
-    let input_grammar = parse_grammar(&input)?;
+pub fn generate_parser_for_grammar(input: &str) -> Result<String> {
+    let input_grammar = parse_grammar(input)?;
     let (syntax_grammar, lexical_grammar, simple_aliases) = prepare_grammar(&input_grammar)?;
     let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
         &syntax_grammar,
diff --git a/src/grammars.rs b/src/grammars.rs
index 8abdad24..7512ec03 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -38,7 +38,7 @@ pub(crate) struct LexicalVariable {
     pub start_state: u32,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, Default, PartialEq, Eq)]
 pub(crate) struct LexicalGrammar {
     pub nfa: Nfa,
     pub variables: Vec<LexicalVariable>,
@@ -112,6 +112,14 @@ impl Production {
     pub fn first_symbol(&self) -> Option<Symbol> {
         self.steps.first().map(|s| s.symbol.clone())
     }
+
+    pub fn last_precedence(&self) -> i32 {
+        self.steps.last().map(|s| s.precedence).unwrap_or(0)
+    }
+
+    pub fn last_associativity(&self) -> Option<Associativity> {
+        self.steps.last().map(|s| s.associativity).unwrap_or(None)
+    }
 }
 
 impl Default for Production {
@@ -137,3 +145,9 @@ impl Variable {
         Self { name: name.to_string(), kind: VariableType::Anonymous, rule }
     }
 }
+
+impl SyntaxVariable {
+    pub fn is_auxiliary(&self) -> bool {
+        self.kind == VariableType::Auxiliary
+    }
+}
diff --git a/src/js/dsl.js b/src/js/dsl.js
new file mode 100644
index 00000000..ba3962cd
--- /dev/null
+++ b/src/js/dsl.js
@@ -0,0 +1,334 @@
+const UNICODE_ESCAPE_PATTERN = /\\u([0-9a-f]{4})/gi;
+const DELIMITER_ESCAPE_PATTERN = /\\\//g;
+
+function alias(rule, value) {
+  const result = {
+    type: "ALIAS",
+    content: normalize(rule),
+    named: false,
+    value: null
+  };
+
+  switch (value.constructor) {
+    case String:
+      result.named = false;
+      result.value = value;
+      return result;
+    case ReferenceError:
+      result.named = true;
+      result.value = value.symbol.name;
+      return result;
+    case Object:
+      if (typeof value.type === 'string' && value.type === 'SYMBOL') {
+        result.named = true;
+        result.value = value.name;
+        return result;
+      }
+  }
+
+  throw new Error('Invalid alias value ' + value);
+}
+
+function blank() {
+  return {
+    type: "BLANK"
+  };
+}
+
+function choice(...elements) {
+  return {
+    type: "CHOICE",
+    members: elements.map(normalize)
+  };
+}
+
+function optional(value) {
+  return choice(value, blank());
+}
+
+function prec(number, rule) {
+  if (rule == null) {
+    rule = number;
+    number = 0;
+  }
+
+  return {
+    type: "PREC",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.left = function(number, rule) {
+  if (rule == null) {
+    rule = number;
+    number = 0;
+  }
+
+  return {
+    type: "PREC_LEFT",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.right = function(number, rule) {
+  if (rule == null) {
+    rule = number;
+    number = 0;
+  }
+
+  return {
+    type: "PREC_RIGHT",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+prec.dynamic = function(number, rule) {
+  return {
+    type: "PREC_DYNAMIC",
+    value: number,
+    content: normalize(rule)
+  };
+}
+
+function repeat(rule) {
+  return {
+    type: "REPEAT",
+    content: normalize(rule)
+  };
+}
+
+function repeat1(rule) {
+  return {
+    type: "REPEAT1",
+    content: normalize(rule)
+  };
+}
+
+function seq(...elements) {
+  return {
+    type: "SEQ",
+    members: elements.map(normalize)
+  };
+}
+
+function sym(name) {
+  return {
+    type: "SYMBOL",
+    name: name
+  };
+}
+
+function token(value) {
+  return {
+    type: "TOKEN",
+    content: normalize(value)
+  };
+}
+
+token.immediate = function(value) {
+  return {
+    type: "IMMEDIATE_TOKEN",
+    content: normalize(value)
+  };
+}
+
+function normalize(value) {
+
+  if (typeof value == "undefined")
+    throw new Error("Undefined symbol");
+
+  switch (value.constructor) {
+    case String:
+      return {
+        type: 'STRING',
+        value
+      };
+    case RegExp:
+      return {
+          type: 'PATTERN',
+          value: value.source
+            .replace(
+              DELIMITER_ESCAPE_PATTERN,
+              '/'
+            )
+            .replace(
+              UNICODE_ESCAPE_PATTERN,
+              (match, group) => String.fromCharCode(parseInt(group, 16))
+            )
+      };
+    case ReferenceError:
+      throw value
+    default:
+      if (typeof value.type === 'string') {
+        return value;
+      } else {
+        throw new TypeError("Invalid rule: " + value.toString());
+      }
+  }
+}
+
+function RuleBuilder(ruleMap) {
+  return new Proxy({}, {
+    get(target, propertyName) {
+      const symbol = {
+        type: 'SYMBOL',
+        name: propertyName
+      };
+
+      if (!ruleMap || ruleMap.hasOwnProperty(propertyName)) {
+        return symbol;
+      } else {
+        const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
+        error.symbol = symbol;
+        return error;
+      }
+    }
+  })
+}
+
+function grammar(baseGrammar, options) {
+    if (!options) {
+      options = baseGrammar;
+      baseGrammar = {
+        name: null,
+        rules: {},
+        extras: [normalize(/\s/)],
+        conflicts: [],
+        externals: [],
+        inline: []
+      };
+    }
+
+    let externals = baseGrammar.externals;
+    if (options.externals) {
+      if (typeof options.externals !== "function") {
+        throw new Error("Grammar's 'externals' property must be a function.");
+      }
+
+      const externalsRuleBuilder = RuleBuilder(null)
+      const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
+
+      if (!Array.isArray(externalRules)) {
+        throw new Error("Grammar's 'externals' property must return an array of rules.");
+      }
+
+      externals = externalRules.map(normalize);
+    }
+
+    const ruleMap = {};
+    for (const key in options.rules) {
+      ruleMap[key] = true;
+    }
+    for (const key in baseGrammar.rules) {
+      ruleMap[key] = true;
+    }
+    for (const external of externals) {
+      if (typeof external.name === 'string') {
+        ruleMap[external.name] = true;
+      }
+    }
+
+    const ruleBuilder = RuleBuilder(ruleMap);
+
+    const name = options.name;
+    if (typeof name !== "string") {
+      throw new Error("Grammar's 'name' property must be a string.");
+    }
+
+    if (!/^[a-zA-Z_]\w*$/.test(name)) {
+      throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
+    }
+
+    let rules = Object.assign({}, baseGrammar.rules);
+    if (options.rules) {
+      if (typeof options.rules !== "object") {
+        throw new Error("Grammar's 'rules' property must be an object.");
+      }
+
+      for (const ruleName in options.rules) {
+        const ruleFn = options.rules[ruleName];
+        if (typeof ruleFn !== "function") {
+          throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
+        }
+        rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
+      }
+    }
+
+    let extras = baseGrammar.extras.slice();
+    if (options.extras) {
+      if (typeof options.extras !== "function") {
+        throw new Error("Grammar's 'extras' property must be a function.");
+      }
+
+      extras = options.extras
+        .call(ruleBuilder, ruleBuilder, baseGrammar.extras)
+        .map(normalize);
+    }
+
+    let word = baseGrammar.word;
+    if (options.word) {
+      word = options.word.call(ruleBuilder, ruleBuilder).name;
+      if (typeof word != 'string') {
+        throw new Error("Grammar's 'word' property must be a named rule.");
+      }
+    }
+
+    let conflicts = baseGrammar.conflicts;
+    if (options.conflicts) {
+      if (typeof options.conflicts !== "function") {
+        throw new Error("Grammar's 'conflicts' property must be a function.");
+      }
+
+      const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
+      const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
+
+      if (!Array.isArray(conflictRules)) {
+        throw new Error("Grammar's conflicts must be an array of arrays of rules.");
+      }
+
+      conflicts = conflictRules.map(conflictSet => {
+        if (!Array.isArray(conflictSet)) {
+          throw new Error("Grammar's conflicts must be an array of arrays of rules.");
+        }
+
+        return conflictSet.map(symbol => symbol.name);
+      });
+    }
+
+    let inline = baseGrammar.inline;
+    if (options.inline) {
+      if (typeof options.inline !== "function") {
+        throw new Error("Grammar's 'inline' property must be a function.");
+      }
+
+      const baseInlineRules = baseGrammar.inline.map(sym);
+      const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
+
+      if (!Array.isArray(inlineRules)) {
+        throw new Error("Grammar's inline must be an array of rules.");
+      }
+
+      inline = inlineRules.map(symbol => symbol.name);
+    }
+
+    if (Object.keys(rules).length == 0) {
+      throw new Error("Grammar must have at least one rule.");
+    }
+
+    return {name, word, rules, extras, conflicts, externals, inline};
+  }
+
+global.alias = alias;
+global.blank = blank;
+global.choice = choice;
+global.optional = optional;
+global.prec = prec;
+global.repeat = repeat;
+global.repeat1 = repeat1;
+global.seq = seq;
+global.sym = sym;
+global.token = token;
+global.grammar = grammar;
diff --git a/src/main.rs b/src/main.rs
index 9dc9efb2..c7ca2ca5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,8 +1,15 @@
-use clap::{App, Arg, SubCommand};
+#[macro_use]
+extern crate serde_derive;
+#[macro_use]
+extern crate serde_json;
+#[macro_use]
+extern crate lazy_static;
 
-#[macro_use] extern crate serde_derive;
-#[macro_use] extern crate serde_json;
-#[macro_use] extern crate lazy_static;
+use std::path::PathBuf;
+use clap::{App, Arg, SubCommand};
+use std::env;
+use std::io::Write;
+use std::process::{Command, Stdio};
 
 mod build_tables;
 mod error;
@@ -20,25 +27,59 @@ fn main() -> error::Result<()> {
         .version("0.1")
         .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
         .about("Generates and tests parsers")
+        .subcommand(SubCommand::with_name("generate").about("Generate a parser"))
         .subcommand(
-            SubCommand::with_name("generate")
-                .about("Generate a parser")
-        ).subcommand(
             SubCommand::with_name("parse")
                 .about("Parse a file")
-                .arg(Arg::with_name("path").index(1))
-        ).subcommand(
+                .arg(Arg::with_name("path").index(1)),
+        )
+        .subcommand(
             SubCommand::with_name("test")
                 .about("Run a parser's tests")
                 .arg(Arg::with_name("path").index(1).required(true))
                 .arg(Arg::with_name("line").index(2).required(true))
-                .arg(Arg::with_name("column").index(3).required(true))
-        ).get_matches();
+                .arg(Arg::with_name("column").index(3).required(true)),
+        )
+        .get_matches();
 
     if let Some(matches) = matches.subcommand_matches("generate") {
-        let code = generate::generate_parser_for_grammar(String::new())?;
+        let mut grammar_path = env::current_dir().expect("Failed to read CWD");
+        grammar_path.push("grammar.js");
+        let grammar_json = load_js_grammar_file(grammar_path);
+        let code = generate::generate_parser_for_grammar(&grammar_json)?;
         println!("{}", code);
     }
 
     Ok(())
 }
+
+fn load_js_grammar_file(grammar_path: PathBuf) -> String {
+    let mut node_process = Command::new("node")
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("Failed to run `node`");
+
+    let js_prelude = include_str!("./js/dsl.js");
+    let mut node_stdin = node_process
+        .stdin
+        .take()
+        .expect("Failed to open stdin for node");
+    write!(
+        node_stdin,
+        "{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
+        js_prelude,
+        grammar_path.to_str().unwrap()
+    ).expect("Failed to write to node's stdin");
+    drop(node_stdin);
+    let output = node_process
+        .wait_with_output()
+        .expect("Failed to read output from node");
+    match output.status.code() {
+        None => panic!("Node process was killed"),
+        Some(0) => {}
+        Some(code) => panic!(format!("Node process exited with status {}", code)),
+    }
+
+    String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
+}
diff --git a/src/nfa.rs b/src/nfa.rs
index bc084ede..f6acb67a 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -23,6 +23,12 @@ pub struct Nfa {
     pub states: Vec<NfaState>
 }
 
+impl Default for Nfa {
+    fn default() -> Self {
+        Self { states: Vec::new() }
+    }
+}
+
 #[derive(Debug)]
 pub struct NfaCursor<'a> {
     pub(crate) state_ids: Vec<u32>,
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs
index 8b87ea2e..ff7204a0 100644
--- a/src/prepare_grammar/extract_simple_aliases.rs
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@@ -22,6 +22,7 @@ pub(super) fn extract_simple_aliases(
                     Symbol { kind: SymbolType::External, index} => &mut external_status_list[index],
                     Symbol { kind: SymbolType::NonTerminal, index} => &mut non_terminal_status_list[index],
                     Symbol { kind: SymbolType::Terminal, index} => &mut terminal_status_list[index],
+                    Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
                 };
 
                 if step.alias.is_none() {
@@ -49,6 +50,7 @@ pub(super) fn extract_simple_aliases(
                     Symbol { kind: SymbolType::External, index} => &external_status_list[index],
                     Symbol { kind: SymbolType::NonTerminal, index} => &non_terminal_status_list[index],
                     Symbol { kind: SymbolType::Terminal, index} => &terminal_status_list[index],
+                    Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
                 };
 
                 if status.alias.is_some() {
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index d53555af..eaeede90 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -67,10 +67,13 @@ pub(super) fn extract_tokens(
         .expected_conflicts
         .into_iter()
         .map(|conflict| {
-            conflict
+            let mut result: Vec<_> = conflict
                 .iter()
                 .map(|symbol| symbol_replacer.replace_symbol(*symbol))
-                .collect()
+                .collect();
+            result.sort_unstable();
+            result.dedup();
+            result
         })
         .collect();
 
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 5bd11a34..2ca610a6 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -1,6 +1,188 @@
-use crate::rules::{Symbol, AliasMap};
-use crate::grammars::{SyntaxGrammar, LexicalGrammar};
-use crate::tables::{ParseTable, LexTable};
+use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
+use crate::tables::{LexTable, ParseTable, ParseTableEntry};
+use std::collections::{HashMap, HashSet};
+use std::fmt::Write;
+
+macro_rules! add_line {
+    ($this: tt, $($arg: tt)*) => {
+        for _ in 0..$this.indent_level {
+            write!(&mut $this.buffer, "  ").unwrap();
+        }
+        $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
+        $this.buffer += "\n";
+    }
+}
+
+struct Generator {
+    buffer: String,
+    indent_level: usize,
+
+    language_name: String,
+    parse_table: ParseTable,
+    main_lex_table: LexTable,
+    keyword_lex_table: LexTable,
+    keyword_capture_token: Option<Symbol>,
+    syntax_grammar: SyntaxGrammar,
+    lexical_grammar: LexicalGrammar,
+    simple_aliases: AliasMap,
+    symbol_ids: HashMap<Symbol, String>,
+    parse_table_entries: Vec<(usize, ParseTableEntry)>,
+    next_parse_action_list_index: usize,
+    unique_aliases: HashSet<Alias>,
+}
+
+impl Generator {
+    fn generate(mut self) -> String {
+        self.add_includes();
+        self.add_pragmas();
+        self.add_stats();
+        self.add_symbol_enum();
+        self.add_symbol_names_list();
+        self.buffer
+    }
+
+    fn add_includes(&mut self) {
+        add_line!(self, "#include <tree_sitter/parser.h>");
+        add_line!(self, "");
+    }
+
+    fn add_pragmas(&mut self) {
+        add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
+        add_line!(self, "#pragma GCC diagnostic push");
+        add_line!(self, "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
+        add_line!(self, "#endif");
+        add_line!(self, "");
+
+        // Compiling large lexer functions can be very slow, especially when
+        // using Visual Studio on Windows. Disabling optimizations is not
+        // ideal, but only a very small fraction of overall parse time is
+        // spent lexing, so the performance impact of this is pretty small.
+        if self.main_lex_table.states.len() > 500 {
+            add_line!(self, "#ifdef _MSC_VER");
+            add_line!(self, "#pragma optimize(\"\", off)");
+            add_line!(self, "#endif");
+            add_line!(self, "");
+        }
+    }
+
+    fn add_stats(&mut self) {
+        let mut token_count = 0;
+
+        for symbol in &self.parse_table.symbols {
+            if symbol.is_terminal() {
+                token_count += 1;
+            } else if symbol.is_external() {
+                let external_token = &self.syntax_grammar.external_tokens[symbol.index];
+                if external_token.corresponding_internal_token.is_none() {
+                    token_count += 1;
+                }
+            }
+        }
+
+        for alias_sequence in &self.parse_table.alias_sequences {
+            for entry in alias_sequence {
+                if let Some(alias) = entry {
+                    self.unique_aliases.insert(alias.clone());
+                }
+            }
+        }
+
+        let mut symbol_id_values = HashSet::new();
+        for i in 0..self.parse_table.symbols.len() {
+            self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_id_values);
+        }
+
+        add_line!(self, "#define LANGUAGE_VERSION {}", 6);
+        add_line!(self, "#define STATE_COUNT {}", self.parse_table.states.len());
+        add_line!(self, "#define SYMBOL_COUNT {}", self.parse_table.symbols.len());
+        add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
+        add_line!(self, "#define TOKEN_COUNT {}", token_count);
+        add_line!(self, "#define EXTERNAL_TOKEN_COUNT {}", self.syntax_grammar.external_tokens.len());
+        // add_line!(self, "#define MAX_ALIAS_SEQUENCE_LENGTH {}\n", self.parse_table.max_alias_sequence_length);
+        add_line!(self, "");
+    }
+
+    fn add_symbol_enum(&mut self) {
+        add_line!(self, "enum {{");
+        self.indent();
+        for i in 0..self.parse_table.symbols.len() {
+            let symbol = self.parse_table.symbols[i];
+            if symbol != Symbol::end() {
+                add_line!(self, "{} = {}", self.symbol_ids[&symbol], i);
+            }
+        }
+        self.dedent();
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn add_symbol_names_list(&mut self) {
+        add_line!(self, "static const char *ts_symbol_names[] = {{");
+        self.indent();
+        self.dedent();
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn assign_symbol_id(&mut self, symbol: Symbol, used_ids: &mut HashSet<String>) {
+        let mut id;
+        if symbol == Symbol::end() {
+            id = "ts_builtin_sym_end".to_string();
+        } else {
+            let (name, kind) = self.metadata_for_symbol(symbol);
+            id = match kind {
+                VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_name(name)),
+                VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_name(name)),
+                VariableType::Hidden | VariableType::Named => {
+                    format!("sym_{}", self.sanitize_name(name))
+                }
+            };
+
+            let mut suffix_number = 1;
+            let mut suffix = String::new();
+            while used_ids.contains(&id) {
+                id.drain(id.len() - suffix.len()..);
+                suffix_number += 1;
+                suffix = suffix_number.to_string();
+                id += &suffix;
+            }
+        }
+
+        used_ids.insert(id.clone());
+        self.symbol_ids.insert(symbol, id);
+    }
+
+    fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
+        match symbol.kind {
+            SymbolType::End => ("end", VariableType::Auxiliary),
+            SymbolType::NonTerminal => {
+                let variable = &self.syntax_grammar.variables[symbol.index];
+                (&variable.name, variable.kind)
+            }
+            SymbolType::Terminal => {
+                let variable = &self.lexical_grammar.variables[symbol.index];
+                (&variable.name, variable.kind)
+            }
+            SymbolType::External => {
+                let token = &self.syntax_grammar.external_tokens[symbol.index];
+                (&token.name, token.kind)
+            }
+        }
+    }
+
+    fn sanitize_name(&self, name: &str) -> String {
+        name.to_string()
+    }
+
+    fn indent(&mut self) {
+        self.indent_level += 1;
+    }
+
+    fn dedent(&mut self) {
+        self.indent_level -= 1;
+    }
+}
 
 pub(crate) fn render_c_code(
     name: &str,
@@ -12,5 +194,21 @@ pub(crate) fn render_c_code(
     lexical_grammar: LexicalGrammar,
     simple_aliases: AliasMap,
 ) -> String {
-    unimplemented!();
+    Generator {
+        buffer: String::new(),
+        indent_level: 0,
+        language_name: name.to_string(),
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        keyword_capture_token,
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases,
+        symbol_ids: HashMap::new(),
+        parse_table_entries: Vec::new(),
+        next_parse_action_list_index: 0,
+        unique_aliases: HashSet::new(),
+    }
+    .generate()
 }
diff --git a/src/rules.rs b/src/rules.rs
index 9374a283..34f4c8b9 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -1,10 +1,11 @@
 use std::collections::HashMap;
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) enum SymbolType {
     External,
     Terminal,
     NonTerminal,
+    End,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@@ -33,7 +34,7 @@ pub(crate) struct MetadataParams {
     pub alias: Option<Alias>,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) struct Symbol {
     pub kind: SymbolType,
     pub index: usize,
@@ -56,6 +57,15 @@ pub(crate) enum Rule {
 }
 
 impl Rule {
+    pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
+        add_metadata(content, move |params| {
+            params.alias = Some(Alias {
+                is_named,
+                value
+            });
+        })
+    }
+
     pub fn token(content: Rule) -> Self {
         add_metadata(content, |params| {
             params.is_token = true;
@@ -169,6 +179,13 @@ impl Symbol {
             index,
         }
     }
+
+    pub fn end() -> Self {
+        Symbol {
+            kind: SymbolType::End,
+            index: 0,
+        }
+    }
 }
 
 impl From<Symbol> for Rule {
@@ -177,7 +194,7 @@ impl From<Symbol> for Rule {
     }
 }
 
-fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
+fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
     match input {
         Rule::Metadata { rule, mut params } => {
             f(&mut params);
diff --git a/src/tables.rs b/src/tables.rs
index de66253c..9100b81e 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -6,20 +6,13 @@ pub(crate) type AliasSequenceId = usize;
 pub(crate) type ParseStateId = usize;
 pub(crate) type LexStateId = usize;
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub(crate) enum ParseActionType {
-    Error,
-    Shift,
-    Reduce,
-    Accept,
-    Recover,
-}
-
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum ParseAction {
     Accept,
-    Error,
-    Shift(ParseStateId),
+    Shift {
+        state: ParseStateId,
+        is_repetition: bool,
+    },
     ShiftExtra,
     Recover,
     Reduce {
@@ -28,50 +21,69 @@ pub(crate) enum ParseAction {
         precedence: i32,
         dynamic_precedence: i32,
         associativity: Option<Associativity>,
-        alias_sequence_id: Option<AliasSequenceId>,
-        is_repetition: bool,
+        alias_sequence_id: AliasSequenceId,
     }
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct ParseTableEntry {
-    actions: Vec<ParseAction>,
-    reusable: bool,
+    pub actions: Vec<ParseAction>,
+    pub reusable: bool,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct ParseState {
-    terminal_entries: HashMap<Symbol, ParseTableEntry>,
-    nonterminal_entries: HashMap<Symbol, ParseStateId>
+    pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
+    pub nonterminal_entries: HashMap<Symbol, ParseStateId>
 }
 
 #[derive(Debug, PartialEq, Eq)]
 pub(crate) struct ParseTable {
-    states: Vec<ParseState>,
-    alias_sequences: Vec<Vec<Alias>>,
+    pub states: Vec<ParseState>,
+    pub symbols: Vec<Symbol>,
+    pub alias_sequences: Vec<Vec<Option<Alias>>>,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct AdvanceAction {
-    state: LexStateId,
-    precedence: Range<i32>,
-    in_main_token: bool,
+    pub state: LexStateId,
+    pub precedence: Range<i32>,
+    pub in_main_token: bool,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct AcceptTokenAction {
-    symbol: Symbol,
-    precedence: i32,
-    implicit_precedence: i32,
+    pub symbol: Symbol,
+    pub precedence: i32,
+    pub implicit_precedence: i32,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct LexState {
-    advance_actions: HashMap<Symbol, AdvanceAction>,
-    accept_action: Option<AcceptTokenAction>,
+    pub advance_actions: HashMap<Symbol, AdvanceAction>,
+    pub accept_action: Option<AcceptTokenAction>,
 }
 
 #[derive(Debug, PartialEq, Eq)]
 pub(crate) struct LexTable {
-    states: Vec<LexState>,
+    pub states: Vec<LexState>,
+}
+
+impl ParseTableEntry {
+    pub fn new() -> Self {
+        Self {
+            reusable: true,
+            actions: Vec::new(),
+        }
+    }
+}
+
+impl ParseAction {
+    pub fn precedence(&self) -> i32 {
+        if let ParseAction::Reduce { precedence, .. } = self {
+            *precedence
+        } else {
+            0
+        }
+    }
 }

From 261a7fd07347b20ad500b58ac3d1dbf96990da81 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 21 Dec 2018 15:02:48 -0800
Subject: [PATCH 074/208] Represent ParseItem with reference to Production

Implement comparisons in a way that disregards past steps.
---
 src/build_tables/inline_variables.rs   | 441 -----------------------
 src/build_tables/item.rs               | 315 ++++++++--------
 src/build_tables/item_set_builder.rs   |  66 ++--
 src/build_tables/mod.rs                | 120 +++----
 src/generate.rs                        |   5 +-
 src/grammars.rs                        |  78 +++-
 src/prepare_grammar/mod.rs             |  16 +-
 src/prepare_grammar/process_inlines.rs | 477 +++++++++++++++++++++++++
 src/rules.rs                           |   4 +-
 9 files changed, 803 insertions(+), 719 deletions(-)
 delete mode 100644 src/build_tables/inline_variables.rs
 create mode 100644 src/prepare_grammar/process_inlines.rs

diff --git a/src/build_tables/inline_variables.rs b/src/build_tables/inline_variables.rs
deleted file mode 100644
index affbe163..00000000
--- a/src/build_tables/inline_variables.rs
+++ /dev/null
@@ -1,441 +0,0 @@
-use super::item::ParseItem;
-use crate::grammars::{Production, SyntaxGrammar};
-use std::collections::HashMap;
-
-pub(crate) struct InlinedProductionMap {
-    pub inlined_productions: Vec<Production>,
-    item_map: HashMap<ParseItem, Vec<u32>>,
-}
-
-impl InlinedProductionMap {
-    pub fn new(grammar: &SyntaxGrammar) -> Self {
-        let mut result = Self {
-            inlined_productions: Vec::new(),
-            item_map: HashMap::new(),
-        };
-
-        let mut items_to_process = Vec::new();
-        for (variable_index, variable) in grammar.variables.iter().enumerate() {
-            for production_index in 0..variable.productions.len() {
-                items_to_process.push(ParseItem::Normal {
-                    variable_index: variable_index as u32,
-                    production_index: production_index as u32,
-                    step_index: 0,
-                });
-                while !items_to_process.is_empty() {
-                    let mut i = 0;
-                    while i < items_to_process.len() {
-                        let item = &items_to_process[i];
-                        if let Some(step) = item.step(grammar, &result) {
-                            if grammar.variables_to_inline.contains(&step.symbol) {
-                                let inlined_items = result
-                                    .inline(*item, grammar)
-                                    .into_iter()
-                                    .map(|production_index| ParseItem::Inlined {
-                                        variable_index: item.variable_index(),
-                                        production_index: *production_index,
-                                        step_index: item.step_index() as u32,
-                                    })
-                                    .collect::<Vec<_>>();
-                                items_to_process.splice(i..i + 1, inlined_items);
-                            } else {
-                                items_to_process[i] = item.successor();
-                                i += 1;
-                            }
-                        } else {
-                            items_to_process.remove(i);
-                        }
-                    }
-                }
-            }
-        }
-
-        result
-    }
-
-    pub fn inlined_items<'a>(
-        &'a self,
-        item: ParseItem,
-    ) -> Option<impl Iterator<Item = ParseItem> + 'a> {
-        self.item_map.get(&item).map(|production_indices| {
-            production_indices
-                .iter()
-                .cloned()
-                .map(move |production_index| ParseItem::Inlined {
-                    variable_index: item.variable_index(),
-                    production_index,
-                    step_index: item.step_index() as u32,
-                })
-        })
-    }
-
-    fn inline(&mut self, item: ParseItem, grammar: &SyntaxGrammar) -> &Vec<u32> {
-        let step_index = item.step_index();
-        let mut productions_to_add = grammar.variables
-            [item.step(grammar, self).unwrap().symbol.index]
-            .productions
-            .clone();
-
-        let mut i = 0;
-        while i < productions_to_add.len() {
-            if let Some(first_symbol) = productions_to_add[i].first_symbol() {
-                if grammar.variables_to_inline.contains(&first_symbol) {
-                    // Remove the production from the vector, replacing it with a placeholder.
-                    let production = productions_to_add
-                        .splice(i..i + 1, [Production::default()].iter().cloned())
-                        .next()
-                        .unwrap();
-
-                    // Replace the placeholder with the inlined productions.
-                    productions_to_add.splice(
-                        i..i + 1,
-                        grammar.variables[first_symbol.index]
-                            .productions
-                            .iter()
-                            .map(|p| {
-                                let mut p = p.clone();
-                                p.steps.extend(production.steps[1..].iter().cloned());
-                                p
-                            }),
-                    );
-                    continue;
-                }
-            }
-            i += 1;
-        }
-
-        let result = productions_to_add
-            .into_iter()
-            .map(|production_to_add| {
-                let mut inlined_production = item.production(grammar, &self).clone();
-                let removed_step = inlined_production
-                    .steps
-                    .splice(
-                        step_index..step_index + 1,
-                        production_to_add.steps.iter().cloned(),
-                    )
-                    .next()
-                    .unwrap();
-                let inserted_steps = &mut inlined_production.steps
-                    [step_index..step_index + production_to_add.steps.len()];
-                if let Some(alias) = removed_step.alias {
-                    for inserted_step in inserted_steps.iter_mut() {
-                        inserted_step.alias = Some(alias.clone());
-                    }
-                }
-                if let Some(last_inserted_step) = inserted_steps.last_mut() {
-                    last_inserted_step.precedence = removed_step.precedence;
-                    last_inserted_step.associativity = removed_step.associativity;
-                }
-                self.inlined_productions
-                    .iter()
-                    .position(|p| *p == inlined_production)
-                    .unwrap_or({
-                        self.inlined_productions.push(inlined_production);
-                        self.inlined_productions.len() - 1
-                    }) as u32
-            })
-            .collect();
-
-        self.item_map.entry(item).or_insert(result)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::grammars::{LexicalGrammar, ProductionStep, SyntaxVariable, VariableType};
-    use crate::rules::{Alias, Associativity, Symbol};
-    use std::borrow::Borrow;
-
-    #[test]
-    fn test_basic_inlining() {
-        let grammar = SyntaxGrammar {
-            expected_conflicts: Vec::new(),
-            extra_tokens: Vec::new(),
-            external_tokens: Vec::new(),
-            word_token: None,
-            variables_to_inline: vec![Symbol::non_terminal(1)],
-            variables: vec![
-                SyntaxVariable {
-                    name: "non-terminal-0".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(10)),
-                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
-                            ProductionStep::new(Symbol::terminal(11)),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-1".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![
-                                ProductionStep::new(Symbol::terminal(12)),
-                                ProductionStep::new(Symbol::terminal(13)),
-                            ],
-                        },
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![ProductionStep::new(Symbol::terminal(14))],
-                        },
-                    ],
-                },
-            ],
-        };
-
-        let inline_map = InlinedProductionMap::new(&grammar);
-
-        // Nothing to inline at step 0.
-        assert!(inline_map
-            .inlined_items(ParseItem::Normal {
-                variable_index: 0,
-                production_index: 0,
-                step_index: 0
-            })
-            .is_none());
-
-        // Inlining variable 1 yields two productions.
-        assert_eq!(
-            display_items(
-                inline_map
-                    .inlined_items(ParseItem::Normal {
-                        variable_index: 0,
-                        production_index: 0,
-                        step_index: 1
-                    })
-                    .unwrap(),
-                &grammar,
-                &inline_map
-            ),
-            vec![
-                "non-terminal-0 → terminal-10 • terminal-12 terminal-13 terminal-11"
-                    .to_string(),
-                "non-terminal-0 → terminal-10 • terminal-14 terminal-11".to_string(),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_nested_inlining() {
-        let grammar = SyntaxGrammar {
-            variables: vec![
-                SyntaxVariable {
-                    name: "non-terminal-0".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(10)),
-                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
-                            ProductionStep::new(Symbol::terminal(11)),
-                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
-                            ProductionStep::new(Symbol::terminal(12)),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-1".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![ProductionStep::new(Symbol::terminal(13))],
-                        },
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![
-                                ProductionStep::new(Symbol::non_terminal(3)), // inlined
-                                ProductionStep::new(Symbol::terminal(14)),
-                            ],
-                        },
-                    ],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-2".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![ProductionStep::new(Symbol::terminal(15))],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-3".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![ProductionStep::new(Symbol::terminal(16))],
-                    }],
-                },
-            ],
-            variables_to_inline: vec![
-                Symbol::non_terminal(1),
-                Symbol::non_terminal(2),
-                Symbol::non_terminal(3),
-            ],
-            expected_conflicts: Vec::new(),
-            extra_tokens: Vec::new(),
-            external_tokens: Vec::new(),
-            word_token: None,
-        };
-
-        let inline_map = InlinedProductionMap::new(&grammar);
-
-        let items = inline_map
-            .inlined_items(ParseItem::Normal {
-                variable_index: 0,
-                production_index: 0,
-                step_index: 1,
-            })
-            .unwrap()
-            .collect::<Vec<_>>();
-
-        assert_eq!(
-            display_items(&items, &grammar, &inline_map),
-            vec![
-                "non-terminal-0 → terminal-10 • terminal-13 terminal-11 non-terminal-2 terminal-12".to_string(),
-                "non-terminal-0 → terminal-10 • terminal-16 terminal-14 terminal-11 non-terminal-2 terminal-12".to_string()
-            ]
-        );
-
-        let item = items[0].successor().successor();
-        assert_eq!(
-            display_items(&[item], &grammar, &inline_map),
-            vec![
-                "non-terminal-0 → terminal-10 terminal-13 terminal-11 • non-terminal-2 terminal-12".to_string(),
-            ]
-        );
-
-        assert_eq!(
-            display_items(inline_map.inlined_items(item).unwrap(), &grammar, &inline_map),
-            vec![
-                "non-terminal-0 → terminal-10 terminal-13 terminal-11 • terminal-15 terminal-12".to_string(),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_inlining_with_precedence_and_alias() {
-        let grammar = SyntaxGrammar {
-            variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
-            variables: vec![
-                SyntaxVariable {
-                    name: "non-terminal-0".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::non_terminal(1)) // inlined
-                                .with_prec(1, Some(Associativity::Left)),
-                            ProductionStep::new(Symbol::terminal(10)),
-                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-1".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![
-                            ProductionStep::new(Symbol::terminal(11))
-                                .with_prec(2, None)
-                                .with_alias("inner_alias", true),
-                            ProductionStep::new(Symbol::terminal(12)).with_prec(3, None),
-                        ],
-                    }],
-                },
-                SyntaxVariable {
-                    name: "non-terminal-2".to_string(),
-                    kind: VariableType::Named,
-                    productions: vec![Production {
-                        dynamic_precedence: 0,
-                        steps: vec![ProductionStep::new(Symbol::terminal(13))
-                            .with_alias("outer_alias", true)],
-                    }],
-                },
-            ],
-            expected_conflicts: Vec::new(),
-            extra_tokens: Vec::new(),
-            external_tokens: Vec::new(),
-            word_token: None,
-        };
-
-        let inline_map = InlinedProductionMap::new(&grammar);
-
-        let items = inline_map
-            .inlined_items(ParseItem::Normal {
-                variable_index: 0,
-                production_index: 0,
-                step_index: 0,
-            })
-            .unwrap()
-            .collect::<Vec<_>>();
-        assert_eq!(
-            display_items(&items, &grammar, &inline_map)[0],
-            "non-terminal-0 → • terminal-11 terminal-12 terminal-10 non-terminal-2".to_string(),
-        );
-
-        // The first step in the inlined production retains its precedence and alias.
-        let item = items[0].successor();
-        assert_eq!(
-            display_items(&[item], &grammar, &inline_map)[0],
-            "non-terminal-0 → terminal-11 • terminal-12 terminal-10 non-terminal-2".to_string(),
-        );
-        assert_eq!(item.precedence(&grammar, &inline_map), 2);
-        assert_eq!(
-            items[0].step(&grammar, &inline_map).unwrap().alias,
-            Some(Alias {
-                value: "inner_alias".to_string(),
-                is_named: true,
-            })
-        );
-
-        // The final terminal of the inlined production inherits the precedence of
-        // the inlined step.
-        let item = item.successor();
-        assert_eq!(
-            display_items(&[item], &grammar, &inline_map)[0],
-            "non-terminal-0 → terminal-11 terminal-12 • terminal-10 non-terminal-2".to_string(),
-        );
-        assert_eq!(item.precedence(&grammar, &inline_map), 1);
-
-        let item = item.successor();
-        assert_eq!(
-            display_items(&[item], &grammar, &inline_map)[0],
-            "non-terminal-0 → terminal-11 terminal-12 terminal-10 • non-terminal-2".to_string(),
-        );
-
-        // All steps of the inlined production inherit their alias from the
-        // inlined step.
-        let items = inline_map.inlined_items(item).unwrap().collect::<Vec<_>>();
-        assert_eq!(
-            display_items(&items, &grammar, &inline_map)[0],
-            "non-terminal-0 → terminal-11 terminal-12 terminal-10 • terminal-13".to_string(),
-        );
-        assert_eq!(
-            items[0].step(&grammar, &inline_map).unwrap().alias,
-            Some(Alias {
-                value: "outer_alias".to_string(),
-                is_named: true,
-            })
-        )
-    }
-
-    fn display_items(
-        items: impl IntoIterator<Item = impl Borrow<ParseItem>>,
-        grammar: &SyntaxGrammar,
-        inline_map: &InlinedProductionMap,
-    ) -> Vec<String> {
-        let lex = LexicalGrammar::default();
-        items
-            .into_iter()
-            .map(|item| format!("{}", item.borrow().display_with(grammar, &lex, inline_map)))
-            .collect()
-    }
-}
diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index 9208f602..49ab4f27 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -1,10 +1,12 @@
-use super::inline_variables::InlinedProductionMap;
 use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
-use crate::rules::{Associativity, Symbol, SymbolType};
+use crate::rules::Associativity;
+use crate::rules::{Symbol, SymbolType};
 use smallbitvec::SmallBitVec;
 use std::collections::{HashMap, BTreeMap};
 use std::fmt;
 use std::hash::{Hash, Hasher};
+use std::u32;
+use std::cmp::Ordering;
 
 lazy_static! {
     static ref START_PRODUCTION: Production = Production {
@@ -28,49 +30,26 @@ pub(crate) struct LookaheadSet {
     eof: bool,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub(crate) enum ParseItem {
-    Start {
-        step_index: u32,
-    },
-    Normal {
-        variable_index: u32,
-        production_index: u32,
-        step_index: u32,
-    },
-    Inlined {
-        variable_index: u32,
-        production_index: u32,
-        step_index: u32,
-    },
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct ParseItem<'a> {
+    pub variable_index: u32,
+    pub step_index: u32,
+    pub production: &'a Production,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-pub(crate) struct ParseItemSet {
-    pub entries: BTreeMap<ParseItem, LookaheadSet>,
+pub(crate) struct ParseItemSet<'a> {
+    pub entries: BTreeMap<ParseItem<'a>, LookaheadSet>,
 }
 
-pub(crate) struct ParseItemDisplay<'a>(
-    &'a ParseItem,
-    &'a SyntaxGrammar,
-    &'a LexicalGrammar,
-    &'a InlinedProductionMap,
-);
-
+pub(crate) struct ParseItemDisplay<'a>(&'a ParseItem<'a>, &'a SyntaxGrammar, &'a LexicalGrammar);
 pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
-
 pub(crate) struct ParseItemSetDisplay<'a>(
-    &'a ParseItemSet,
+    &'a ParseItemSet<'a>,
     &'a SyntaxGrammar,
     &'a LexicalGrammar,
-    &'a InlinedProductionMap,
 );
 
-struct ParseItemSetMapEntry(ParseItemSet, u64);
-pub(crate) struct ParseItemSetMap<T> {
-    map: HashMap<ParseItemSetMapEntry, T>
-}
-
 impl LookaheadSet {
     pub fn new() -> Self {
         Self {
@@ -173,152 +152,79 @@ impl LookaheadSet {
     }
 }
 
-impl ParseItem {
+impl<'a> ParseItem<'a> {
     pub fn start() -> Self {
-        ParseItem::Start { step_index: 0 }
-    }
-
-    pub fn is_kernel(&self) -> bool {
-        match self {
-            ParseItem::Start { .. } => true,
-            ParseItem::Normal { step_index, .. } | ParseItem::Inlined { step_index, .. } => {
-                *step_index > 0
-            }
+        ParseItem {
+            variable_index: u32::MAX,
+            production: &START_PRODUCTION,
+            step_index: 0,
         }
     }
 
-    pub fn production<'a>(
-        &self,
-        grammar: &'a SyntaxGrammar,
-        inlined_productions: &'a InlinedProductionMap,
-    ) -> &'a Production {
-        match self {
-            ParseItem::Start { .. } => &START_PRODUCTION,
-            ParseItem::Normal {
-                variable_index,
-                production_index,
-                ..
-            } => {
-                &grammar.variables[*variable_index as usize].productions[*production_index as usize]
-            }
-            ParseItem::Inlined {
-                production_index, ..
-            } => &inlined_productions.inlined_productions[*production_index as usize],
+    pub fn step(&self) -> Option<&'a ProductionStep> {
+        self.production.steps.get(self.step_index as usize)
+    }
+
+    pub fn symbol(&self) -> Option<Symbol> {
+        self.step().map(|step| step.symbol)
+    }
+
+    pub fn associativity(&self) -> Option<Associativity> {
+        self.prev_step().and_then(|step| step.associativity)
+    }
+
+    pub fn precedence(&self) -> i32 {
+        self.prev_step().map_or(0, |step| step.precedence)
+    }
+
+    pub fn prev_step(&self) -> Option<&'a ProductionStep> {
+        self.production.steps.get(self.step_index as usize - 1)
+    }
+
+    pub fn is_done(&self) -> bool {
+        self.step_index as usize == self.production.steps.len()
+    }
+
+    pub fn is_augmented(&self) -> bool {
+        self.variable_index == u32::MAX
+    }
+
+    pub fn successor(&self) -> ParseItem<'a> {
+        ParseItem {
+            variable_index: self.variable_index,
+            production: self.production,
+            step_index: self.step_index + 1,
         }
     }
 
-    pub fn symbol(
-        &self,
-        grammar: &SyntaxGrammar,
-        inlined_productions: &InlinedProductionMap,
-    ) -> Option<Symbol> {
-        self.step(grammar, inlined_productions).map(|s| s.symbol)
-    }
-
-    pub fn step<'a>(
-        &self,
-        grammar: &'a SyntaxGrammar,
-        inlined_productions: &'a InlinedProductionMap,
-    ) -> Option<&'a ProductionStep> {
-        self.production(grammar, inlined_productions)
-            .steps
-            .get(self.step_index())
-    }
-
-    pub fn precedence<'a>(
-        &self,
-        grammar: &'a SyntaxGrammar,
-        inlines: &'a InlinedProductionMap,
-    ) -> i32 {
-        self.production(grammar, inlines)
-            .steps
-            .get(self.step_index() - 1)
-            .map(|s| s.precedence)
-            .unwrap_or(0)
-    }
-
-    pub fn associativity<'a>(
-        &self,
-        grammar: &'a SyntaxGrammar,
-        inlines: &'a InlinedProductionMap,
-    ) -> Option<Associativity> {
-        let production = self.production(grammar, inlines);
-        let step_index = self.step_index();
-        if step_index == production.steps.len() {
-            production.steps.last().and_then(|s| s.associativity)
-        } else {
-            None
-        }
-    }
-
-    pub fn variable_index(&self) -> u32 {
-        match self {
-            ParseItem::Start { .. } => panic!("Start item doesn't have a variable index"),
-            ParseItem::Normal { variable_index, .. }
-            | ParseItem::Inlined { variable_index, .. } => *variable_index,
-        }
-    }
-
-    pub fn step_index(&self) -> usize {
-        match self {
-            ParseItem::Start { step_index }
-            | ParseItem::Normal { step_index, .. }
-            | ParseItem::Inlined { step_index, .. } => *step_index as usize,
-        }
-    }
-
-    pub fn is_final(&self) -> bool {
-        if let ParseItem::Start { step_index: 1 } = self {
-            true
-        } else {
-            false
-        }
-    }
-
-    fn step_index_mut(&mut self) -> &mut u32 {
-        match self {
-            ParseItem::Start { step_index }
-            | ParseItem::Normal { step_index, .. }
-            | ParseItem::Inlined { step_index, .. } => step_index,
-        }
-    }
-
-    pub fn display_with<'a>(
+    pub fn display_with(
         &'a self,
         syntax_grammar: &'a SyntaxGrammar,
         lexical_grammar: &'a LexicalGrammar,
-        inlines: &'a InlinedProductionMap,
     ) -> ParseItemDisplay<'a> {
-        ParseItemDisplay(self, syntax_grammar, lexical_grammar, inlines)
-    }
-
-    pub fn successor(&self) -> ParseItem {
-        let mut result = self.clone();
-        *result.step_index_mut() += 1;
-        result
+        ParseItemDisplay(self, syntax_grammar, lexical_grammar)
     }
 }
 
-impl ParseItemSet {
-    pub fn with<'a>(elements: impl IntoIterator<Item = &'a (ParseItem, LookaheadSet)>) -> Self {
+impl<'a> ParseItemSet<'a> {
+    pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, LookaheadSet)>) -> Self {
         let mut result = Self::default();
         for (item, lookaheads) in elements {
-            result.entries.insert(*item, lookaheads.clone());
+            result.entries.insert(item, lookaheads);
         }
         result
     }
 
-    pub fn display_with<'a>(
+    pub fn display_with(
         &'a self,
         syntax_grammar: &'a SyntaxGrammar,
         lexical_grammar: &'a LexicalGrammar,
-        inlines: &'a InlinedProductionMap,
     ) -> ParseItemSetDisplay<'a> {
-        ParseItemSetDisplay(self, syntax_grammar, lexical_grammar, inlines)
+        ParseItemSetDisplay(self, syntax_grammar, lexical_grammar)
     }
 }
 
-impl Default for ParseItemSet {
+impl<'a> Default for ParseItemSet<'a> {
     fn default() -> Self {
         Self {
             entries: BTreeMap::new(),
@@ -328,20 +234,18 @@ impl Default for ParseItemSet {
 
 impl<'a> fmt::Display for ParseItemDisplay<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        if let ParseItem::Start { .. } = &self.0 {
+        if self.0.is_augmented() {
             write!(f, "START →")?;
         } else {
             write!(
                 f,
                 "{} →",
-                &self.1.variables[self.0.variable_index() as usize].name
+                &self.1.variables[self.0.variable_index as usize].name
             )?;
         }
 
-        let step_index = self.0.step_index();
-        let production = self.0.production(self.1, self.3);
-        for (i, step) in production.steps.iter().enumerate() {
-            if i == step_index {
+        for (i, step) in self.0.production.steps.iter().enumerate() {
+            if i == self.0.step_index as usize {
                 write!(f, " •")?;
             }
 
@@ -359,7 +263,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
             }
         }
 
-        if production.steps.len() == step_index {
+        if self.0.is_done() {
             write!(f, " •")?;
         }
 
@@ -398,7 +302,7 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
             writeln!(
                 f,
                 "{}\t{}",
-                item.display_with(self.1, self.2, self.3),
+                item.display_with(self.1, self.2),
                 lookaheads.display_with(self.1, self.2)
             )?;
         }
@@ -406,7 +310,94 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
     }
 }
 
-impl Hash for ParseItemSet {
+impl<'a> Hash for ParseItem<'a> {
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        hasher.write_u32(self.variable_index);
+        hasher.write_u32(self.step_index);
+        hasher.write_i32(self.production.dynamic_precedence);
+        hasher.write_usize(self.production.steps.len());
+        hasher.write_i32(self.precedence());
+        self.associativity().hash(hasher);
+        for step in &self.production.steps[0..self.step_index as usize] {
+            step.alias.hash(hasher);
+        }
+        for step in &self.production.steps[self.step_index as usize..] {
+            step.hash(hasher);
+        }
+    }
+}
+
+impl<'a> PartialEq for ParseItem<'a> {
+    fn eq(&self, other: &Self) -> bool {
+        if self.variable_index != other.variable_index
+            || self.step_index != other.step_index
+            || self.production.dynamic_precedence != other.production.dynamic_precedence
+            || self.production.steps.len() != other.production.steps.len()
+            || self.precedence() != other.precedence()
+            || self.associativity() != other.associativity()
+        {
+            return false;
+        }
+
+        for (i, step) in self.production.steps.iter().enumerate() {
+            if i < self.step_index as usize {
+                if step.alias != other.production.steps[i].alias {
+                    return false;
+                }
+            } else {
+                if *step != other.production.steps[i] {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    }
+}
+
+impl<'a> PartialOrd for ParseItem<'a> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        if let Some(o) = self.variable_index.partial_cmp(&other.variable_index) {
+            return Some(o);
+        }
+        if let Some(o) = self.step_index.partial_cmp(&other.step_index) {
+            return Some(o);
+        }
+        if let Some(o) = self.production.dynamic_precedence.partial_cmp(&other.production.dynamic_precedence) {
+            return Some(o);
+        }
+        if let Some(o) = self.production.steps.len().partial_cmp(&other.production.steps.len()) {
+            return Some(o);
+        }
+        if let Some(o) = self.precedence().partial_cmp(&other.precedence()) {
+            return Some(o);
+        }
+        if let Some(o) = self.associativity().partial_cmp(&other.associativity()) {
+            return Some(o);
+        }
+        for (i, step) in self.production.steps.iter().enumerate() {
+            let cmp = if i < self.step_index as usize {
+                step.alias.partial_cmp(&other.production.steps[i].alias)
+            } else {
+                step.partial_cmp(&other.production.steps[i])
+            };
+            if let Some(o) = cmp {
+                return Some(o);
+            }
+        }
+        return None;
+    }
+}
+
+impl<'a> Ord for ParseItem<'a> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.partial_cmp(other).unwrap_or(Ordering::Equal)
+    }
+}
+
+impl<'a> Eq for ParseItem<'a> {}
+
+impl<'a> Hash for ParseItemSet<'a> {
     fn hash<H: Hasher>(&self, hasher: &mut H) {
         hasher.write_usize(self.entries.len());
         for (item, lookaheads) in self.entries.iter() {
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
index 530c1f25..52ee0a45 100644
--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@@ -1,12 +1,11 @@
-use super::inline_variables::InlinedProductionMap;
 use super::item::{LookaheadSet, ParseItem, ParseItemSet};
-use crate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
 use crate::rules::Symbol;
 use std::collections::{HashMap, HashSet};
 
 #[derive(Clone, Debug, PartialEq, Eq)]
-struct TransitiveClosureAddition {
-    item: ParseItem,
+struct TransitiveClosureAddition<'a> {
+    item: ParseItem<'a>,
     info: FollowSetInfo,
 }
 
@@ -16,11 +15,10 @@ struct FollowSetInfo {
     propagates_lookaheads: bool,
 }
 
-pub(crate) struct ParseItemSetBuilder {
+pub(crate) struct ParseItemSetBuilder<'a> {
     first_sets: HashMap<Symbol, LookaheadSet>,
     last_sets: HashMap<Symbol, LookaheadSet>,
-    transitive_closure_additions: Vec<Vec<TransitiveClosureAddition>>,
-    pub inlines: InlinedProductionMap,
+    transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
 }
 
 fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
@@ -29,13 +27,16 @@ fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
     }
 }
 
-impl ParseItemSetBuilder {
-    pub fn new(syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar) -> Self {
+impl<'a> ParseItemSetBuilder<'a> {
+    pub fn new(
+        syntax_grammar: &'a SyntaxGrammar,
+        lexical_grammar: &'a LexicalGrammar,
+        inlines: &'a InlinedProductionMap,
+    ) -> Self {
         let mut result = Self {
             first_sets: HashMap::new(),
             last_sets: HashMap::new(),
             transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
-            inlines: InlinedProductionMap::new(syntax_grammar),
         };
 
         // For each grammar symbol, populate the FIRST and LAST sets: the set of
@@ -193,22 +194,28 @@ impl ParseItemSetBuilder {
             for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
                 let variable = &syntax_grammar.variables[variable_index];
                 let non_terminal = Symbol::non_terminal(variable_index);
+                let variable_index = variable_index as u32;
                 if syntax_grammar.variables_to_inline.contains(&non_terminal) {
                     continue;
                 }
-                for production_index in 0..variable.productions.len() {
-                    let item = ParseItem::Normal {
-                        variable_index: variable_index as u32,
-                        production_index: production_index as u32,
+                for (production_index, production) in variable.productions.iter().enumerate() {
+                    let item = ParseItem {
+                        variable_index,
+                        production,
                         step_index: 0,
                     };
 
-                    if let Some(inlined_items) = result.inlines.inlined_items(item) {
-                        for inlined_item in inlined_items {
+                    // let step_id = item.as_step_id(syntax_grammar, inlines);
+                    if let Some(inlined_productions) = inlines.inlined_productions(item.production, item.step_index) {
+                        for production in inlined_productions {
                             find_or_push(
                                 additions_for_non_terminal,
                                 TransitiveClosureAddition {
-                                    item: inlined_item,
+                                    item: ParseItem {
+                                        variable_index,
+                                        production,
+                                        step_index: item.step_index,
+                                    },
                                     info: follow_set_info.clone(),
                                 },
                             );
@@ -231,14 +238,19 @@ impl ParseItemSetBuilder {
 
     pub(crate) fn transitive_closure(
         &mut self,
-        item_set: &ParseItemSet,
-        grammar: &SyntaxGrammar,
-    ) -> ParseItemSet {
+        item_set: &ParseItemSet<'a>,
+        grammar: &'a SyntaxGrammar,
+        inlines: &'a InlinedProductionMap,
+    ) -> ParseItemSet<'a> {
         let mut result = ParseItemSet::default();
         for (item, lookaheads) in &item_set.entries {
-            if let Some(items) = self.inlines.inlined_items(*item) {
-                for item in items {
-                    self.add_item(&mut result, item, lookaheads, grammar);
+            if let Some(productions) = inlines.inlined_productions(item.production, item.step_index) {
+                for production in productions {
+                    self.add_item(&mut result, ParseItem {
+                        variable_index: item.variable_index,
+                        production,
+                        step_index: item.step_index,
+                    }, lookaheads, grammar);
                 }
             } else {
                 self.add_item(&mut result, *item, lookaheads, grammar);
@@ -253,14 +265,14 @@ impl ParseItemSetBuilder {
 
     fn add_item(
         &self,
-        set: &mut ParseItemSet,
-        item: ParseItem,
+        set: &mut ParseItemSet<'a>,
+        item: ParseItem<'a>,
         lookaheads: &LookaheadSet,
         grammar: &SyntaxGrammar,
     ) {
-        if let Some(step) = item.step(grammar, &self.inlines) {
+        if let Some(step) = item.step() {
             if step.symbol.is_non_terminal() {
-                let next_step = item.successor().step(grammar, &self.inlines);
+                let next_step = item.successor().step();
 
                 // Determine which tokens can follow this non-terminal.
                 let following_tokens = if let Some(next_step) = next_step {
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 091c5486..27951453 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -1,14 +1,14 @@
-mod inline_variables;
 mod item;
 mod item_set_builder;
 
 use self::item::{LookaheadSet, ParseItem, ParseItemSet};
 use self::item_set_builder::ParseItemSetBuilder;
 use crate::error::{Error, Result};
-use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
 use crate::rules::{AliasMap, Associativity, Symbol, SymbolType};
-use crate::tables::ParseTableEntry;
-use crate::tables::{AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable};
+use crate::tables::{
+    AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+};
 use core::ops::Range;
 use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet, VecDeque};
@@ -30,12 +30,13 @@ struct ParseStateQueueEntry {
 }
 
 struct ParseTableBuilder<'a> {
-    item_set_builder: ParseItemSetBuilder,
+    item_set_builder: ParseItemSetBuilder<'a>,
     syntax_grammar: &'a SyntaxGrammar,
     lexical_grammar: &'a LexicalGrammar,
+    inlines: &'a InlinedProductionMap,
     simple_aliases: &'a AliasMap,
-    state_ids_by_item_set: HashMap<ParseItemSet, ParseStateId>,
-    item_sets_by_state_id: Vec<ParseItemSet>,
+    state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
+    item_sets_by_state_id: Vec<ParseItemSet<'a>>,
     parse_state_queue: VecDeque<ParseStateQueueEntry>,
     parse_table: ParseTable,
 }
@@ -46,16 +47,17 @@ impl<'a> ParseTableBuilder<'a> {
         self.parse_table.alias_sequences.push(Vec::new());
 
         // Ensure that the error state has index 0.
-        let error_state_id = self.add_parse_state(
-            &Vec::new(),
-            &Vec::new(),
-            ParseItemSet::default(),
-        );
+        let error_state_id =
+            self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
 
         self.add_parse_state(
             &Vec::new(),
             &Vec::new(),
-            ParseItemSet::with(&[(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]),
+            ParseItemSet::with(
+                [(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]
+                    .iter()
+                    .cloned(),
+            ),
         );
 
         self.process_part_state_queue()?;
@@ -68,7 +70,7 @@ impl<'a> ParseTableBuilder<'a> {
         &mut self,
         preceding_symbols: &SymbolSequence,
         preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
-        item_set: ParseItemSet,
+        item_set: ParseItemSet<'a>,
     ) -> ParseStateId {
         match self.state_ids_by_item_set.entry(item_set) {
             Entry::Occupied(o) => {
@@ -99,16 +101,14 @@ impl<'a> ParseTableBuilder<'a> {
             println!(
                 "ITEM SET {}:\n{}",
                 entry.state_id,
-                self.item_sets_by_state_id[entry.state_id].display_with(
-                    &self.syntax_grammar,
-                    &self.lexical_grammar,
-                    &self.item_set_builder.inlines
-                )
+                self.item_sets_by_state_id[entry.state_id]
+                    .display_with(&self.syntax_grammar, &self.lexical_grammar,)
             );
 
             let item_set = self.item_set_builder.transitive_closure(
                 &self.item_sets_by_state_id[entry.state_id],
                 self.syntax_grammar,
+                self.inlines,
             );
 
             // println!("TRANSITIVE CLOSURE:");
@@ -131,7 +131,7 @@ impl<'a> ParseTableBuilder<'a> {
         &mut self,
         mut preceding_symbols: SymbolSequence,
         mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
-        item_set: ParseItemSet,
+        item_set: ParseItemSet<'a>,
         state_id: ParseStateId,
     ) -> Result<()> {
         let mut terminal_successors = HashMap::new();
@@ -139,9 +139,7 @@ impl<'a> ParseTableBuilder<'a> {
         let mut lookaheads_with_conflicts = HashSet::new();
 
         for (item, lookaheads) in &item_set.entries {
-            if let Some(next_symbol) =
-                item.symbol(self.syntax_grammar, &self.item_set_builder.inlines)
-            {
+            if let Some(next_symbol) = item.symbol() {
                 let successor = item.successor();
                 if next_symbol.is_non_terminal() {
                     // Keep track of where auxiliary non-terminals (repeat symbols) are
@@ -169,17 +167,15 @@ impl<'a> ParseTableBuilder<'a> {
                         .insert_all(lookaheads);
                 }
             } else {
-                let action = if item.is_final() {
+                let action = if item.is_augmented() {
                     ParseAction::Accept
                 } else {
-                    let production =
-                        item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
                     ParseAction::Reduce {
-                        symbol: Symbol::non_terminal(item.variable_index() as usize),
-                        child_count: item.step_index(),
-                        precedence: production.last_precedence(),
-                        associativity: production.last_associativity(),
-                        dynamic_precedence: production.dynamic_precedence,
+                        symbol: Symbol::non_terminal(item.variable_index as usize),
+                        child_count: item.step_index as usize,
+                        precedence: item.precedence(),
+                        associativity: item.associativity(),
+                        dynamic_precedence: item.production.dynamic_precedence,
                         alias_sequence_id: self.get_alias_sequence_id(item),
                     }
                 };
@@ -280,17 +276,15 @@ impl<'a> ParseTableBuilder<'a> {
         let mut shift_precedence: Option<Range<i32>> = None;
         let mut conflicting_items = HashSet::new();
         for (item, lookaheads) in &item_set.entries {
-            let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
-            let step_index = item.step_index();
-            if let Some(step) = production.steps.get(step_index) {
-                if step_index > 0 {
+            if let Some(step) = item.step() {
+                if item.step_index > 0 {
                     if self
                         .item_set_builder
                         .first_set(&step.symbol)
                         .contains(&conflicting_lookahead)
                     {
                         conflicting_items.insert(item);
-                        let precedence = production.steps[step_index - 1].precedence;
+                        let precedence = item.precedence();
                         if let Some(range) = &mut shift_precedence {
                             if precedence < range.start {
                                 range.start = precedence;
@@ -316,11 +310,11 @@ impl<'a> ParseTableBuilder<'a> {
             // by leaving it in the parse table, but marking the SHIFT action with
             // an `is_repetition` flag.
             let conflicting_variable_index =
-                conflicting_items.iter().next().unwrap().variable_index();
+                conflicting_items.iter().next().unwrap().variable_index;
             if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
                 if conflicting_items
                     .iter()
-                    .all(|item| item.variable_index() == conflicting_variable_index)
+                    .all(|item| item.variable_index == conflicting_variable_index)
                 {
                     *is_repetition = true;
                     return Ok(());
@@ -340,10 +334,7 @@ impl<'a> ParseTableBuilder<'a> {
                     && shift_precedence.start < reduce_precedence)
             {
                 entry.actions.pop();
-                conflicting_items.retain(|item| {
-                    item.step(&self.syntax_grammar, &self.item_set_builder.inlines)
-                        .is_none()
-                });
+                conflicting_items.retain(|item| item.is_done());
             }
             // If the SHIFT and REDUCE actions have the same predence, consider
             // the REDUCE actions' associativity.
@@ -367,10 +358,7 @@ impl<'a> ParseTableBuilder<'a> {
                 match (has_left, has_non, has_right) {
                     (true, false, false) => {
                         entry.actions.pop();
-                        conflicting_items.retain(|item| {
-                            item.step(&self.syntax_grammar, &self.item_set_builder.inlines)
-                                .is_none()
-                        });
+                        conflicting_items.retain(|item| item.is_done());
                     }
                     (false, false, true) => {
                         entry.actions.drain(0..entry.actions.len() - 1);
@@ -392,7 +380,7 @@ impl<'a> ParseTableBuilder<'a> {
         // Determine the set of parent symbols involved in this conflict.
         let mut actual_conflict = Vec::new();
         for item in &conflicting_items {
-            let symbol = Symbol::non_terminal(item.variable_index() as usize);
+            let symbol = Symbol::non_terminal(item.variable_index as usize);
             if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
                 actual_conflict.extend(
                     preceding_auxiliary_symbols
@@ -441,7 +429,7 @@ impl<'a> ParseTableBuilder<'a> {
 
             for preceding_symbol in preceding_symbols
                 .iter()
-                .take(preceding_symbols.len() - item.step_index())
+                .take(preceding_symbols.len() - item.step_index as usize)
             {
                 write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
             }
@@ -449,17 +437,12 @@ impl<'a> ParseTableBuilder<'a> {
             write!(
                 &mut msg,
                 "  ({}",
-                &self.syntax_grammar.variables[item.variable_index() as usize].name
+                &self.syntax_grammar.variables[item.variable_index as usize].name
             )
             .unwrap();
 
-            for (j, step) in item
-                .production(&self.syntax_grammar, &self.item_set_builder.inlines)
-                .steps
-                .iter()
-                .enumerate()
-            {
-                if j == item.step_index() {
+            for (j, step) in item.production.steps.iter().enumerate() {
+                if j as u32 == item.step_index {
                     write!(&mut msg, "  •").unwrap();
                 }
                 write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
@@ -467,10 +450,7 @@ impl<'a> ParseTableBuilder<'a> {
 
             write!(&mut msg, ")").unwrap();
 
-            if item
-                .step(&self.syntax_grammar, &self.item_set_builder.inlines)
-                .is_none()
-            {
+            if item.is_done() {
                 write!(
                     &mut msg,
                     "  •  {}",
@@ -479,9 +459,8 @@ impl<'a> ParseTableBuilder<'a> {
                 .unwrap();
             }
 
-            let precedence = item.precedence(&self.syntax_grammar, &self.item_set_builder.inlines);
-            let associativity =
-                item.associativity(&self.syntax_grammar, &self.item_set_builder.inlines);
+            let precedence = item.precedence();
+            let associativity = item.associativity();
             if precedence != 0 || associativity.is_some() {
                 write!(
                     &mut msg,
@@ -506,8 +485,7 @@ impl<'a> ParseTableBuilder<'a> {
             .entries
             .keys()
             .filter_map(|item| {
-                if item.symbol(&self.syntax_grammar, &self.item_set_builder.inlines) == Some(symbol)
-                {
+                if item.symbol() == Some(symbol) {
                     None
                 } else {
                     None
@@ -554,8 +532,12 @@ impl<'a> ParseTableBuilder<'a> {
     }
 
     fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
-        let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
-        let alias_sequence = production.steps.iter().map(|s| s.alias.clone()).collect();
+        let alias_sequence = item
+            .production
+            .steps
+            .iter()
+            .map(|s| s.alias.clone())
+            .collect();
         if let Some(index) = self
             .parse_table
             .alias_sequences
@@ -592,12 +574,14 @@ pub(crate) fn build_tables(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
     simple_aliases: &AliasMap,
+    inlines: &InlinedProductionMap,
 ) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
     ParseTableBuilder {
         syntax_grammar,
         lexical_grammar,
         simple_aliases,
-        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar),
+        inlines,
+        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
         state_ids_by_item_set: HashMap::new(),
         item_sets_by_state_id: Vec::new(),
         parse_state_queue: VecDeque::new(),
diff --git a/src/generate.rs b/src/generate.rs
index dc3d5176..cdbbea4f 100644
--- a/src/generate.rs
+++ b/src/generate.rs
@@ -6,11 +6,12 @@ use crate::render::render_c_code;
 
 pub fn generate_parser_for_grammar(input: &str) -> Result<String> {
     let input_grammar = parse_grammar(input)?;
-    let (syntax_grammar, lexical_grammar, simple_aliases) = prepare_grammar(&input_grammar)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?;
     let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
         &syntax_grammar,
         &lexical_grammar,
-        &simple_aliases
+        &simple_aliases,
+        &inlines
     )?;
     let c_code = render_c_code(
         &input_grammar.name,
diff --git a/src/grammars.rs b/src/grammars.rs
index 7512ec03..b751e4e4 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -1,12 +1,13 @@
-use crate::rules::{Associativity, Alias, Rule, Symbol};
 use crate::nfa::Nfa;
+use crate::rules::{Alias, Associativity, Rule, Symbol};
+use std::collections::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum VariableType {
     Hidden,
     Auxiliary,
     Anonymous,
-    Named
+    Named,
 }
 
 // Input grammar
@@ -46,12 +47,12 @@ pub(crate) struct LexicalGrammar {
 
 // Extracted syntax grammar
 
-#[derive(Clone, Debug, PartialEq, Eq)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) struct ProductionStep {
-  pub symbol: Symbol,
-  pub precedence: i32,
-  pub associativity: Option<Associativity>,
-  pub alias: Option<Alias>,
+    pub symbol: Symbol,
+    pub precedence: i32,
+    pub associativity: Option<Associativity>,
+    pub alias: Option<Alias>,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -60,6 +61,11 @@ pub(crate) struct Production {
     pub dynamic_precedence: i32,
 }
 
+pub(crate) struct InlinedProductionMap {
+    pub productions: Vec<Production>,
+    pub production_map: HashMap<(*const Production, u32), Vec<usize>>,
+}
+
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct SyntaxVariable {
     pub name: String,
@@ -86,7 +92,12 @@ pub(crate) struct SyntaxGrammar {
 
 impl ProductionStep {
     pub(crate) fn new(symbol: Symbol) -> Self {
-        Self { symbol, precedence: 0, associativity: None, alias: None }
+        Self {
+            symbol,
+            precedence: 0,
+            associativity: None,
+            alias: None,
+        }
     }
 
     pub(crate) fn with_prec(self, precedence: i32, associativity: Option<Associativity>) -> Self {
@@ -103,7 +114,10 @@ impl ProductionStep {
             symbol: self.symbol,
             precedence: self.precedence,
             associativity: self.associativity,
-            alias: Some(Alias { value: value.to_string(), is_named }),
+            alias: Some(Alias {
+                value: value.to_string(),
+                is_named,
+            }),
         }
     }
 }
@@ -124,25 +138,44 @@ impl Production {
 
 impl Default for Production {
     fn default() -> Self {
-        Production { dynamic_precedence: 0, steps: Vec::new() }
+        Production {
+            dynamic_precedence: 0,
+            steps: Vec::new(),
+        }
     }
 }
 
 impl Variable {
     pub fn named(name: &str, rule: Rule) -> Self {
-        Self { name: name.to_string(), kind: VariableType::Named, rule }
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Named,
+            rule,
+        }
     }
 
     pub fn auxiliary(name: &str, rule: Rule) -> Self {
-        Self { name: name.to_string(), kind: VariableType::Auxiliary, rule }
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Auxiliary,
+            rule,
+        }
     }
 
     pub fn hidden(name: &str, rule: Rule) -> Self {
-        Self { name: name.to_string(), kind: VariableType::Hidden, rule }
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Hidden,
+            rule,
+        }
     }
 
     pub fn anonymous(name: &str, rule: Rule) -> Self {
-        Self { name: name.to_string(), kind: VariableType::Anonymous, rule }
+        Self {
+            name: name.to_string(),
+            kind: VariableType::Anonymous,
+            rule,
+        }
     }
 }
 
@@ -151,3 +184,20 @@ impl SyntaxVariable {
         self.kind == VariableType::Auxiliary
     }
 }
+
+impl InlinedProductionMap {
+    pub fn inlined_productions<'a>(
+        &'a self,
+        production: &Production,
+        step_index: u32,
+    ) -> Option<impl Iterator<Item = &'a Production> + 'a> {
+        self.production_map
+            .get(&(production as *const Production, step_index))
+            .map(|production_indices| {
+                production_indices
+                    .iter()
+                    .cloned()
+                    .map(move |index| &self.productions[index])
+            })
+    }
+}
diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs
index 22435fca..f325383b 100644
--- a/src/prepare_grammar/mod.rs
+++ b/src/prepare_grammar/mod.rs
@@ -4,6 +4,7 @@ mod extract_simple_aliases;
 mod extract_tokens;
 mod flatten_grammar;
 mod intern_symbols;
+mod process_inlines;
 
 use self::expand_repeats::expand_repeats;
 use self::expand_tokens::expand_tokens;
@@ -11,8 +12,11 @@ use self::extract_simple_aliases::extract_simple_aliases;
 use self::extract_tokens::extract_tokens;
 use self::flatten_grammar::flatten_grammar;
 use self::intern_symbols::intern_symbols;
+use self::process_inlines::process_inlines;
 use crate::error::Result;
-use crate::grammars::{ExternalToken, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable};
+use crate::grammars::{
+    ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable,
+};
 use crate::rules::{AliasMap, Rule, Symbol};
 
 pub(self) struct IntermediateGrammar<T, U> {
@@ -36,12 +40,18 @@ pub(self) struct ExtractedLexicalGrammar {
 
 pub(crate) fn prepare_grammar(
     input_grammar: &InputGrammar,
-) -> Result<(SyntaxGrammar, LexicalGrammar, AliasMap)> {
+) -> Result<(
+    SyntaxGrammar,
+    LexicalGrammar,
+    InlinedProductionMap,
+    AliasMap,
+)> {
     let interned_grammar = intern_symbols(input_grammar)?;
     let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
     let syntax_grammar = expand_repeats(syntax_grammar);
     let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
     let lexical_grammar = expand_tokens(lexical_grammar)?;
     let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
-    Ok((syntax_grammar, lexical_grammar, simple_aliases))
+    let inlines = process_inlines(&syntax_grammar);
+    Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
 }
diff --git a/src/prepare_grammar/process_inlines.rs b/src/prepare_grammar/process_inlines.rs
new file mode 100644
index 00000000..0d7f6827
--- /dev/null
+++ b/src/prepare_grammar/process_inlines.rs
@@ -0,0 +1,477 @@
+use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
+use std::collections::HashMap;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+struct ProductionStepId {
+    variable_index: Option<usize>,
+    production_index: usize,
+    step_index: usize,
+}
+
+struct InlinedProductionMapBuilder {
+    production_indices_by_step_id: HashMap<ProductionStepId, Vec<usize>>,
+    productions: Vec<Production>,
+}
+
+impl ProductionStepId {
+    pub fn successor(&self) -> Self {
+        Self {
+            variable_index: self.variable_index,
+            production_index: self.production_index,
+            step_index: self.step_index + 1,
+        }
+    }
+}
+
+fn production_for_id<'a>(
+    map: &'a InlinedProductionMapBuilder,
+    id: ProductionStepId,
+    grammar: &'a SyntaxGrammar,
+) -> &'a Production {
+    if let Some(variable_index) = id.variable_index {
+        &grammar.variables[variable_index].productions[id.production_index]
+    } else {
+        &map.productions[id.production_index]
+    }
+}
+
+fn production_step_for_id<'a>(
+    map: &'a InlinedProductionMapBuilder,
+    id: ProductionStepId,
+    grammar: &'a SyntaxGrammar,
+) -> Option<&'a ProductionStep> {
+    production_for_id(map, id, grammar).steps.get(id.step_index)
+}
+
+fn inline<'a>(
+    map: &'a mut InlinedProductionMapBuilder,
+    step_id: ProductionStepId,
+    grammar: &'a SyntaxGrammar,
+) -> &'a Vec<usize> {
+    let step = production_step_for_id(map, step_id, grammar).unwrap();
+    let mut productions_to_add = grammar.variables[step.symbol.index].productions.clone();
+
+    let mut i = 0;
+    while i < productions_to_add.len() {
+        if let Some(first_symbol) = productions_to_add[i].first_symbol() {
+            if grammar.variables_to_inline.contains(&first_symbol) {
+                // Remove the production from the vector, replacing it with a placeholder.
+                let production = productions_to_add
+                    .splice(i..i + 1, [Production::default()].iter().cloned())
+                    .next()
+                    .unwrap();
+
+                // Replace the placeholder with the inlined productions.
+                productions_to_add.splice(
+                    i..i + 1,
+                    grammar.variables[first_symbol.index]
+                        .productions
+                        .iter()
+                        .map(|p| {
+                            let mut p = p.clone();
+                            p.steps.extend(production.steps[1..].iter().cloned());
+                            p
+                        }),
+                );
+                continue;
+            }
+        }
+        i += 1;
+    }
+
+    let result = productions_to_add
+        .into_iter()
+        .map(|production_to_add| {
+            let mut inlined_production = production_for_id(&map, step_id, grammar).clone();
+            let removed_step = inlined_production
+                .steps
+                .splice(
+                    step_id.step_index..step_id.step_index + 1,
+                    production_to_add.steps.iter().cloned(),
+                )
+                .next()
+                .unwrap();
+            let inserted_steps = &mut inlined_production.steps
+                [step_id.step_index..step_id.step_index + production_to_add.steps.len()];
+            if let Some(alias) = removed_step.alias {
+                for inserted_step in inserted_steps.iter_mut() {
+                    inserted_step.alias = Some(alias.clone());
+                }
+            }
+            if let Some(last_inserted_step) = inserted_steps.last_mut() {
+                last_inserted_step.precedence = removed_step.precedence;
+                last_inserted_step.associativity = removed_step.associativity;
+            }
+            map.productions
+                .iter()
+                .position(|p| *p == inlined_production)
+                .unwrap_or({
+                    map.productions.push(inlined_production);
+                    map.productions.len() - 1
+                })
+        })
+        .collect();
+
+    map.production_indices_by_step_id
+        .entry(step_id)
+        .or_insert(result)
+}
+
+pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
+    let mut result = InlinedProductionMapBuilder {
+        productions: Vec::new(),
+        production_indices_by_step_id: HashMap::new(),
+    };
+
+    let mut step_ids_to_process = Vec::new();
+    for (variable_index, variable) in grammar.variables.iter().enumerate() {
+        for production_index in 0..variable.productions.len() {
+            step_ids_to_process.push(ProductionStepId {
+                variable_index: Some(variable_index),
+                production_index,
+                step_index: 0,
+            });
+            while !step_ids_to_process.is_empty() {
+                let mut i = 0;
+                while i < step_ids_to_process.len() {
+                    let step_id = step_ids_to_process[i];
+                    if let Some(step) = production_step_for_id(&result, step_id, grammar) {
+                        if grammar.variables_to_inline.contains(&step.symbol) {
+                            let inlined_step_ids = inline(&mut result, step_id, grammar)
+                                .into_iter()
+                                .cloned()
+                                .map(|production_index| ProductionStepId {
+                                    variable_index: None,
+                                    production_index,
+                                    step_index: step_id.step_index,
+                                })
+                                .collect::<Vec<_>>();
+                            step_ids_to_process.splice(i..i + 1, inlined_step_ids);
+                        } else {
+                            step_ids_to_process[i] = step_id.successor();
+                            i += 1;
+                        }
+                    } else {
+                        step_ids_to_process.remove(i);
+                    }
+                }
+            }
+        }
+    }
+
+    // result
+    let productions = result.productions;
+    let production_indices_by_step_id = result.production_indices_by_step_id;
+
+    let production_map = production_indices_by_step_id
+        .into_iter()
+        .map(|(step_id, production_indices)| {
+            let production = if let Some(variable_index) = step_id.variable_index {
+                &grammar.variables[variable_index].productions[step_id.production_index]
+            } else {
+                &productions[step_id.production_index]
+            } as *const Production;
+            ((production, step_id.step_index as u32), production_indices)
+        })
+        .collect();
+
+    InlinedProductionMap { productions, production_map }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
+    use crate::rules::{Associativity, Symbol};
+
+    #[test]
+    fn test_basic_inlining() {
+        let grammar = SyntaxGrammar {
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+            variables_to_inline: vec![Symbol::non_terminal(1)],
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                            ProductionStep::new(Symbol::terminal(11)),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::terminal(12)),
+                                ProductionStep::new(Symbol::terminal(13)),
+                            ],
+                        },
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![ProductionStep::new(Symbol::terminal(14))],
+                        },
+                    ],
+                },
+            ],
+        };
+        let inline_map = process_inlines(&grammar);
+
+        // Nothing to inline at step 0.
+        assert!(inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 0)
+            .is_none());
+
+        // Inlining variable 1 yields two productions.
+        assert_eq!(
+            inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 1)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                        ProductionStep::new(Symbol::terminal(13)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(14)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                    ],
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_nested_inlining() {
+        let grammar = SyntaxGrammar {
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(10)),
+                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
+                            ProductionStep::new(Symbol::terminal(11)),
+                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
+                            ProductionStep::new(Symbol::terminal(12)),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![ProductionStep::new(Symbol::terminal(13))],
+                        },
+                        Production {
+                            dynamic_precedence: 0,
+                            steps: vec![
+                                ProductionStep::new(Symbol::non_terminal(3)), // inlined
+                                ProductionStep::new(Symbol::terminal(14)),
+                            ],
+                        },
+                    ],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-2".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(15))],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-3".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(16))],
+                    }],
+                },
+            ],
+            variables_to_inline: vec![
+                Symbol::non_terminal(1),
+                Symbol::non_terminal(2),
+                Symbol::non_terminal(3),
+            ],
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+        let inline_map = process_inlines(&grammar);
+
+        let productions: Vec<&Production> = inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 1)
+            .unwrap()
+            .collect();
+
+        assert_eq!(
+            productions.iter().cloned().cloned().collect::<Vec<_>>(),
+            vec![
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(13)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: 0,
+                    steps: vec![
+                        ProductionStep::new(Symbol::terminal(10)),
+                        ProductionStep::new(Symbol::terminal(16)),
+                        ProductionStep::new(Symbol::terminal(14)),
+                        ProductionStep::new(Symbol::terminal(11)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::terminal(12)),
+                    ],
+                },
+            ]
+        );
+
+        assert_eq!(
+            inline_map
+                .inlined_productions(productions[0], 3)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::terminal(10)),
+                    ProductionStep::new(Symbol::terminal(13)),
+                    ProductionStep::new(Symbol::terminal(11)),
+                    ProductionStep::new(Symbol::terminal(15)),
+                    ProductionStep::new(Symbol::terminal(12)),
+                ],
+            },]
+        );
+    }
+
+    #[test]
+    fn test_inlining_with_precedence_and_alias() {
+        let grammar = SyntaxGrammar {
+            variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
+            variables: vec![
+                SyntaxVariable {
+                    name: "non-terminal-0".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            // inlined
+                            ProductionStep::new(Symbol::non_terminal(1))
+                                .with_prec(1, Some(Associativity::Left)),
+                            ProductionStep::new(Symbol::terminal(10)),
+                            // inlined
+                            ProductionStep::new(Symbol::non_terminal(2))
+                                .with_alias("outer_alias", true),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-1".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(11))
+                                .with_prec(2, None)
+                                .with_alias("inner_alias", true),
+                            ProductionStep::new(Symbol::terminal(12)).with_prec(3, None),
+                        ],
+                    }],
+                },
+                SyntaxVariable {
+                    name: "non-terminal-2".to_string(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![ProductionStep::new(Symbol::terminal(13))],
+                    }],
+                },
+            ],
+            expected_conflicts: Vec::new(),
+            extra_tokens: Vec::new(),
+            external_tokens: Vec::new(),
+            word_token: None,
+        };
+
+        let inline_map = process_inlines(&grammar);
+
+        let productions: Vec<_> = inline_map
+            .inlined_productions(&grammar.variables[0].productions[0], 0)
+            .unwrap()
+            .collect();
+
+        assert_eq!(
+            productions.iter().cloned().cloned().collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    // The first step in the inlined production retains its precedence
+                    // and alias.
+                    ProductionStep::new(Symbol::terminal(11))
+                        .with_prec(2, None)
+                        .with_alias("inner_alias", true),
+                    // The final step of the inlined production inherits the precedence of
+                    // the inlined step.
+                    ProductionStep::new(Symbol::terminal(12))
+                        .with_prec(1, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::terminal(10)),
+                    ProductionStep::new(Symbol::non_terminal(2))
+                        .with_alias("outer_alias", true),
+                ]
+            }],
+        );
+
+        assert_eq!(
+            inline_map
+                .inlined_productions(productions[0], 3)
+                .unwrap()
+                .cloned()
+                .collect::<Vec<_>>(),
+            vec![Production {
+                dynamic_precedence: 0,
+                steps: vec![
+                    ProductionStep::new(Symbol::terminal(11))
+                        .with_prec(2, None)
+                        .with_alias("inner_alias", true),
+                    ProductionStep::new(Symbol::terminal(12))
+                        .with_prec(1, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::terminal(10)),
+                    // All steps of the inlined production inherit their alias from the
+                    // inlined step.
+                    ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true),
+                ]
+            }],
+        );
+    }
+}
diff --git a/src/rules.rs b/src/rules.rs
index 34f4c8b9..3bfd5181 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -8,13 +8,13 @@ pub(crate) enum SymbolType {
     End,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) enum Associativity {
     Left,
     Right,
 }
 
-#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) struct Alias {
     pub value: String,
     pub is_named: bool,

From 99ecf29e4b4bb394b17f9818ce31f5da781f7575 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 23 Dec 2018 10:15:23 -0800
Subject: [PATCH 075/208] Fix typo causing infinite recursion in expand_regex

---
 src/prepare_grammar/expand_tokens.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 37f75e5a..5ee9861f 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -217,7 +217,7 @@ fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool)
             }
             RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
                 if expand_zero_or_more(&repetition.ast, nfa, next_state_id, is_sep)? {
-                    expand_count(ast, min, nfa, next_state_id, is_sep)
+                    expand_count(&repetition.ast, min, nfa, next_state_id, is_sep)
                 } else {
                     Ok(false)
                 }

From 5258ee2e6ad3f202e43f98a093c82da1143a27fa Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 23 Dec 2018 10:16:03 -0800
Subject: [PATCH 076/208] Implement more C code generation

---
 src/build_tables/item.rs              |  60 +-
 src/build_tables/item_set_builder.rs  |  27 +-
 src/build_tables/lex_table_builder.rs |  24 +
 src/build_tables/mod.rs               |  61 ++-
 src/render/mod.rs                     | 761 ++++++++++++++++++++++++--
 src/tables.rs                         |  12 +-
 6 files changed, 840 insertions(+), 105 deletions(-)
 create mode 100644 src/build_tables/lex_table_builder.rs

diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index 49ab4f27..28723d24 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -2,7 +2,7 @@ use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}
 use crate::rules::Associativity;
 use crate::rules::{Symbol, SymbolType};
 use smallbitvec::SmallBitVec;
-use std::collections::{HashMap, BTreeMap};
+use std::collections::BTreeMap;
 use std::fmt;
 use std::hash::{Hash, Hasher};
 use std::u32;
@@ -178,7 +178,11 @@ impl<'a> ParseItem<'a> {
     }
 
     pub fn prev_step(&self) -> Option<&'a ProductionStep> {
-        self.production.steps.get(self.step_index as usize - 1)
+        if self.step_index > 0 {
+            Some(&self.production.steps[self.step_index as usize - 1])
+        } else {
+            None
+        }
     }
 
     pub fn is_done(&self) -> bool {
@@ -355,43 +359,49 @@ impl<'a> PartialEq for ParseItem<'a> {
     }
 }
 
-impl<'a> PartialOrd for ParseItem<'a> {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        if let Some(o) = self.variable_index.partial_cmp(&other.variable_index) {
-            return Some(o);
+impl<'a> Ord for ParseItem<'a> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        let o = self.variable_index.cmp(&other.variable_index);
+        if o != Ordering::Equal {
+            return o;
         }
-        if let Some(o) = self.step_index.partial_cmp(&other.step_index) {
-            return Some(o);
+        let o = self.step_index.cmp(&other.step_index);
+        if o != Ordering::Equal {
+            return o;
         }
-        if let Some(o) = self.production.dynamic_precedence.partial_cmp(&other.production.dynamic_precedence) {
-            return Some(o);
+        let o = self.production.dynamic_precedence.cmp(&other.production.dynamic_precedence);
+        if o != Ordering::Equal {
+            return o;
         }
-        if let Some(o) = self.production.steps.len().partial_cmp(&other.production.steps.len()) {
-            return Some(o);
+        let o = self.production.steps.len().cmp(&other.production.steps.len());
+        if o != Ordering::Equal {
+            return o;
         }
-        if let Some(o) = self.precedence().partial_cmp(&other.precedence()) {
-            return Some(o);
+        let o = self.precedence().cmp(&other.precedence());
+        if o != Ordering::Equal {
+            return o;
         }
-        if let Some(o) = self.associativity().partial_cmp(&other.associativity()) {
-            return Some(o);
+        let o = self.associativity().cmp(&other.associativity());
+        if o != Ordering::Equal {
+            return o;
         }
         for (i, step) in self.production.steps.iter().enumerate() {
-            let cmp = if i < self.step_index as usize {
-                step.alias.partial_cmp(&other.production.steps[i].alias)
+            let o = if i < self.step_index as usize {
+                step.alias.cmp(&other.production.steps[i].alias)
             } else {
-                step.partial_cmp(&other.production.steps[i])
+                step.cmp(&other.production.steps[i])
             };
-            if let Some(o) = cmp {
-                return Some(o);
+            if o != Ordering::Equal {
+                return o;
             }
         }
-        return None;
+        return Ordering::Equal;
     }
 }
 
-impl<'a> Ord for ParseItem<'a> {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.partial_cmp(other).unwrap_or(Ordering::Equal)
+impl<'a> PartialOrd for ParseItem<'a> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
     }
 }
 
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
index 52ee0a45..d7883988 100644
--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@@ -198,15 +198,16 @@ impl<'a> ParseItemSetBuilder<'a> {
                 if syntax_grammar.variables_to_inline.contains(&non_terminal) {
                     continue;
                 }
-                for (production_index, production) in variable.productions.iter().enumerate() {
+                for production in &variable.productions {
                     let item = ParseItem {
                         variable_index,
                         production,
                         step_index: 0,
                     };
 
-                    // let step_id = item.as_step_id(syntax_grammar, inlines);
-                    if let Some(inlined_productions) = inlines.inlined_productions(item.production, item.step_index) {
+                    if let Some(inlined_productions) =
+                        inlines.inlined_productions(item.production, item.step_index)
+                    {
                         for production in inlined_productions {
                             find_or_push(
                                 additions_for_non_terminal,
@@ -244,16 +245,21 @@ impl<'a> ParseItemSetBuilder<'a> {
     ) -> ParseItemSet<'a> {
         let mut result = ParseItemSet::default();
         for (item, lookaheads) in &item_set.entries {
-            if let Some(productions) = inlines.inlined_productions(item.production, item.step_index) {
+            if let Some(productions) = inlines.inlined_productions(item.production, item.step_index)
+            {
                 for production in productions {
-                    self.add_item(&mut result, ParseItem {
-                        variable_index: item.variable_index,
-                        production,
-                        step_index: item.step_index,
-                    }, lookaheads, grammar);
+                    self.add_item(
+                        &mut result,
+                        ParseItem {
+                            variable_index: item.variable_index,
+                            production,
+                            step_index: item.step_index,
+                        },
+                        lookaheads,
+                    );
                 }
             } else {
-                self.add_item(&mut result, *item, lookaheads, grammar);
+                self.add_item(&mut result, *item, lookaheads);
             }
         }
         result
@@ -268,7 +274,6 @@ impl<'a> ParseItemSetBuilder<'a> {
         set: &mut ParseItemSet<'a>,
         item: ParseItem<'a>,
         lookaheads: &LookaheadSet,
-        grammar: &SyntaxGrammar,
     ) {
         if let Some(step) = item.step() {
             if step.symbol.is_non_terminal() {
diff --git a/src/build_tables/lex_table_builder.rs b/src/build_tables/lex_table_builder.rs
new file mode 100644
index 00000000..86d1578b
--- /dev/null
+++ b/src/build_tables/lex_table_builder.rs
@@ -0,0 +1,24 @@
+use crate::rules::Symbol;
+use crate::tables::LexTable;
+use crate::grammars::{SyntaxGrammar, LexicalGrammar};
+
+pub(crate) struct LexTableBuilder<'a> {
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    table: LexTable,
+}
+
+impl<'a> LexTableBuilder<'a> {
+    pub fn new(
+        syntax_grammar: &'a SyntaxGrammar,
+        lexical_grammar: &'a LexicalGrammar,
+    ) -> Self {
+        Self {
+            syntax_grammar, lexical_grammar, table: LexTable::default()
+        }
+    }
+
+    pub fn build(self) -> (LexTable, LexTable, Option<Symbol>) {
+        (LexTable::default(), LexTable::default(), None)
+    }
+}
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 27951453..fc17ce7f 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -1,10 +1,13 @@
 mod item;
 mod item_set_builder;
+mod lex_table_builder;
 
 use self::item::{LookaheadSet, ParseItem, ParseItemSet};
 use self::item_set_builder::ParseItemSetBuilder;
+use self::lex_table_builder::LexTableBuilder;
 use crate::error::{Error, Result};
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::rules::Alias;
 use crate::rules::{AliasMap, Associativity, Symbol, SymbolType};
 use crate::tables::{
     AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
@@ -43,7 +46,7 @@ struct ParseTableBuilder<'a> {
 
 impl<'a> ParseTableBuilder<'a> {
     fn build(mut self) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
-        // Ensure that the empty rename sequence has index 0.
+        // Ensure that the empty alias sequence has index 0.
         self.parse_table.alias_sequences.push(Vec::new());
 
         // Ensure that the error state has index 0.
@@ -61,9 +64,18 @@ impl<'a> ParseTableBuilder<'a> {
         );
 
         self.process_part_state_queue()?;
+
+        let lex_table_builder = LexTableBuilder::new(self.syntax_grammar, self.lexical_grammar);
+
         self.populate_used_symbols();
 
-        Err(Error::grammar("oh no"))
+        let (main_lex_table, keyword_lex_table, keyword_capture_token) = lex_table_builder.build();
+        Ok((
+            self.parse_table,
+            main_lex_table,
+            keyword_lex_table,
+            keyword_capture_token,
+        ))
     }
 
     fn add_parse_state(
@@ -82,6 +94,7 @@ impl<'a> ParseTableBuilder<'a> {
                 let state_id = self.parse_table.states.len();
                 self.item_sets_by_state_id.push(v.key().clone());
                 self.parse_table.states.push(ParseState {
+                    lex_state_id: 0,
                     terminal_entries: HashMap::new(),
                     nonterminal_entries: HashMap::new(),
                 });
@@ -98,12 +111,16 @@ impl<'a> ParseTableBuilder<'a> {
 
     fn process_part_state_queue(&mut self) -> Result<()> {
         while let Some(entry) = self.parse_state_queue.pop_front() {
-            println!(
-                "ITEM SET {}:\n{}",
-                entry.state_id,
-                self.item_sets_by_state_id[entry.state_id]
-                    .display_with(&self.syntax_grammar, &self.lexical_grammar,)
-            );
+            let debug = false;
+
+            if debug {
+                println!(
+                    "ITEM SET {}:\n{}",
+                    entry.state_id,
+                    self.item_sets_by_state_id[entry.state_id]
+                        .display_with(&self.syntax_grammar, &self.lexical_grammar,)
+                );
+            }
 
             let item_set = self.item_set_builder.transitive_closure(
                 &self.item_sets_by_state_id[entry.state_id],
@@ -111,11 +128,12 @@ impl<'a> ParseTableBuilder<'a> {
                 self.inlines,
             );
 
-            // println!("TRANSITIVE CLOSURE:");
-            // for item in item_set.entries.keys() {
-            //     println!("{}", item.display_with(&self.syntax_grammar, &self.lexical_grammar, &self.item_set_builder.inlines));
-            // }
-            // println!("");
+            if debug {
+                println!(
+                    "TRANSITIVE CLOSURE:\n{}",
+                    item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
+                );
+            }
 
             self.add_actions(
                 entry.preceding_symbols,
@@ -249,6 +267,17 @@ impl<'a> ParseTableBuilder<'a> {
             )?;
         }
 
+        let state = &mut self.parse_table.states[state_id];
+        for extra_token in &self.syntax_grammar.extra_tokens {
+            state
+                .terminal_entries
+                .entry(*extra_token)
+                .or_insert(ParseTableEntry {
+                    reusable: true,
+                    actions: vec![ParseAction::ShiftExtra],
+                });
+        }
+
         Ok(())
     }
 
@@ -514,6 +543,7 @@ impl<'a> ParseTableBuilder<'a> {
                 non_terminal_usages[symbol.index] = true;
             }
         }
+        self.parse_table.symbols.push(Symbol::end());
         for (i, value) in terminal_usages.into_iter().enumerate() {
             if value {
                 self.parse_table.symbols.push(Symbol::terminal(i));
@@ -532,12 +562,15 @@ impl<'a> ParseTableBuilder<'a> {
     }
 
     fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
-        let alias_sequence = item
+        let mut alias_sequence: Vec<Option<Alias>> = item
             .production
             .steps
             .iter()
             .map(|s| s.alias.clone())
             .collect();
+        while alias_sequence.last() == Some(&None) {
+            alias_sequence.pop();
+        }
         if let Some(index) = self
             .parse_table
             .alias_sequences
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 2ca610a6..fc4cdafb 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -1,8 +1,16 @@
-use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::nfa::CharacterSet;
 use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
-use crate::tables::{LexTable, ParseTable, ParseTableEntry};
+use crate::tables::{LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
 use std::collections::{HashMap, HashSet};
 use std::fmt::Write;
+use std::mem::swap;
+
+macro_rules! add {
+    ($this: tt, $($arg: tt)*) => {{
+        $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
+    }}
+}
 
 macro_rules! add_line {
     ($this: tt, $($arg: tt)*) => {
@@ -14,10 +22,21 @@ macro_rules! add_line {
     }
 }
 
+macro_rules! indent {
+    ($this: tt) => {
+        $this.indent_level += 1;
+    };
+}
+
+macro_rules! dedent {
+    ($this: tt) => {
+        $this.indent_level -= 1;
+    };
+}
+
 struct Generator {
     buffer: String,
     indent_level: usize,
-
     language_name: String,
     parse_table: ParseTable,
     main_lex_table: LexTable,
@@ -27,9 +46,9 @@ struct Generator {
     lexical_grammar: LexicalGrammar,
     simple_aliases: AliasMap,
     symbol_ids: HashMap<Symbol, String>,
-    parse_table_entries: Vec<(usize, ParseTableEntry)>,
-    next_parse_action_list_index: usize,
-    unique_aliases: HashSet<Alias>,
+    alias_ids: HashMap<Alias, String>,
+    external_scanner_states: Vec<HashSet<usize>>,
+    alias_map: HashMap<Alias, Option<Symbol>>,
 }
 
 impl Generator {
@@ -39,6 +58,30 @@ impl Generator {
         self.add_stats();
         self.add_symbol_enum();
         self.add_symbol_names_list();
+        self.add_symbol_metadata_list();
+        self.add_alias_sequences();
+
+        let mut main_lex_table = LexTable::default();
+        swap(&mut main_lex_table, &mut self.main_lex_table);
+        self.add_lex_function("ts_lex", main_lex_table);
+
+        if self.keyword_capture_token.is_some() {
+            let mut keyword_lex_table = LexTable::default();
+            swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
+            self.add_lex_function("ts_lex_keywords", keyword_lex_table);
+        }
+
+        self.add_lex_modes_list();
+
+        if !self.syntax_grammar.external_tokens.is_empty() {
+            self.add_external_token_enum();
+            self.add_external_scanner_symbol_map();
+            self.add_external_scanner_states_list();
+        }
+
+        self.add_parse_table();
+        self.add_parser_export();
+
         self.buffer
     }
 
@@ -50,7 +93,10 @@ impl Generator {
     fn add_pragmas(&mut self) {
         add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
         add_line!(self, "#pragma GCC diagnostic push");
-        add_line!(self, "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
+        add_line!(
+            self,
+            "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""
+        );
         add_line!(self, "#endif");
         add_line!(self, "");
 
@@ -67,81 +113,639 @@ impl Generator {
     }
 
     fn add_stats(&mut self) {
-        let mut token_count = 0;
-
-        for symbol in &self.parse_table.symbols {
-            if symbol.is_terminal() {
-                token_count += 1;
-            } else if symbol.is_external() {
-                let external_token = &self.syntax_grammar.external_tokens[symbol.index];
-                if external_token.corresponding_internal_token.is_none() {
-                    token_count += 1;
+        let token_count = self
+            .parse_table
+            .symbols
+            .iter()
+            .filter(|symbol| {
+                if symbol.is_terminal() {
+                    true
+                } else if symbol.is_external() {
+                    self.syntax_grammar.external_tokens[symbol.index]
+                        .corresponding_internal_token
+                        .is_none()
+                } else {
+                    false
                 }
-            }
+            })
+            .count();
+
+        let mut symbol_identifiers = HashSet::new();
+        for i in 0..self.parse_table.symbols.len() {
+            self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
         }
 
         for alias_sequence in &self.parse_table.alias_sequences {
             for entry in alias_sequence {
                 if let Some(alias) = entry {
-                    self.unique_aliases.insert(alias.clone());
+                    let alias_kind = if alias.is_named {
+                        VariableType::Named
+                    } else {
+                        VariableType::Anonymous
+                    };
+                    let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
+                        let (name, kind) = self.metadata_for_symbol(*symbol);
+                        name == alias.value && kind == alias_kind
+                    });
+                    let alias_id = if let Some(symbol) = matching_symbol {
+                        self.symbol_ids[&symbol].clone()
+                    } else if alias.is_named {
+                        format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
+                    } else {
+                        format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
+                    };
+                    self.alias_ids.entry(alias.clone()).or_insert(alias_id);
+                    self.alias_map
+                        .entry(alias.clone())
+                        .or_insert(matching_symbol);
                 }
             }
         }
 
-        let mut symbol_id_values = HashSet::new();
-        for i in 0..self.parse_table.symbols.len() {
-            self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_id_values);
-        }
-
         add_line!(self, "#define LANGUAGE_VERSION {}", 6);
-        add_line!(self, "#define STATE_COUNT {}", self.parse_table.states.len());
-        add_line!(self, "#define SYMBOL_COUNT {}", self.parse_table.symbols.len());
-        add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
+        add_line!(
+            self,
+            "#define STATE_COUNT {}",
+            self.parse_table.states.len()
+        );
+        add_line!(
+            self,
+            "#define SYMBOL_COUNT {}",
+            self.parse_table.symbols.len()
+        );
+        add_line!(
+            self,
+            "#define ALIAS_COUNT {}",
+            self.alias_map.iter().filter(|e| e.1.is_none()).count()
+        );
         add_line!(self, "#define TOKEN_COUNT {}", token_count);
-        add_line!(self, "#define EXTERNAL_TOKEN_COUNT {}", self.syntax_grammar.external_tokens.len());
-        // add_line!(self, "#define MAX_ALIAS_SEQUENCE_LENGTH {}\n", self.parse_table.max_alias_sequence_length);
+        add_line!(
+            self,
+            "#define EXTERNAL_TOKEN_COUNT {}",
+            self.syntax_grammar.external_tokens.len()
+        );
+        if let Some(max_alias_sequence_length) = self
+            .parse_table
+            .alias_sequences
+            .iter()
+            .map(|seq| seq.len())
+            .max()
+        {
+            add_line!(
+                self,
+                "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
+                max_alias_sequence_length
+            );
+        }
         add_line!(self, "");
     }
 
     fn add_symbol_enum(&mut self) {
         add_line!(self, "enum {{");
-        self.indent();
-        for i in 0..self.parse_table.symbols.len() {
-            let symbol = self.parse_table.symbols[i];
-            if symbol != Symbol::end() {
-                add_line!(self, "{} = {}", self.symbol_ids[&symbol], i);
+        indent!(self);
+        let mut i = 1;
+        for symbol in self.parse_table.symbols.iter() {
+            if *symbol != Symbol::end() {
+                add_line!(self, "{} = {},", self.symbol_ids[&symbol], i);
+                i += 1;
             }
         }
-        self.dedent();
+        for (alias, symbol) in &self.alias_map {
+            if symbol.is_none() {
+                add_line!(self, "{} = {},", self.alias_ids[&alias], i);
+            }
+            i += 1;
+        }
+        dedent!(self);
         add_line!(self, "}};");
         add_line!(self, "");
     }
 
     fn add_symbol_names_list(&mut self) {
         add_line!(self, "static const char *ts_symbol_names[] = {{");
-        self.indent();
-        self.dedent();
+        indent!(self);
+        for symbol in self.parse_table.symbols.iter() {
+            if *symbol != Symbol::end() {
+                add_line!(
+                    self,
+                    "[{}] = \"{}\",",
+                    self.symbol_ids[&symbol],
+                    self.sanitize_string(self.metadata_for_symbol(*symbol).0)
+                );
+            }
+        }
+        for (alias, symbol) in &self.alias_map {
+            if symbol.is_none() {
+                add_line!(
+                    self,
+                    "[{}] = \"{}\",",
+                    self.alias_ids[&alias],
+                    self.sanitize_string(&alias.value)
+                );
+            }
+        }
+        dedent!(self);
         add_line!(self, "}};");
         add_line!(self, "");
     }
 
-    fn assign_symbol_id(&mut self, symbol: Symbol, used_ids: &mut HashSet<String>) {
+    fn add_symbol_metadata_list(&mut self) {
+        add_line!(
+            self,
+            "static const TSSymbolMetadata ts_symbol_metadata[] = {{"
+        );
+        indent!(self);
+        for symbol in &self.parse_table.symbols {
+            add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
+            indent!(self);
+            match self.metadata_for_symbol(*symbol).1 {
+                VariableType::Named => {
+                    add_line!(self, ".visible = true,");
+                    add_line!(self, ".named = true,");
+                }
+                VariableType::Anonymous => {
+                    add_line!(self, ".visible = true,");
+                    add_line!(self, ".named = false,");
+                }
+                VariableType::Hidden => {
+                    add_line!(self, ".visible = false,");
+                    add_line!(self, ".named = true,");
+                }
+                VariableType::Auxiliary => {
+                    add_line!(self, ".visible = false,");
+                    add_line!(self, ".named = false,");
+                }
+            }
+            dedent!(self);
+            add_line!(self, "}},");
+        }
+        for (alias, matching_symbol) in &self.alias_map {
+            if matching_symbol.is_none() {
+                add_line!(self, "[{}] = {{", self.alias_ids[&alias]);
+                indent!(self);
+                add_line!(self, ".visible = true,");
+                add_line!(self, ".named = {},", alias.is_named);
+                dedent!(self);
+                add_line!(self, "}},");
+            }
+        }
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn add_alias_sequences(&mut self) {
+        add_line!(
+            self,
+            "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
+            self.parse_table.alias_sequences.len()
+        );
+        indent!(self);
+        for (i, sequence) in self.parse_table.alias_sequences.iter().enumerate().skip(1) {
+            add_line!(self, "[{}] = {{", i);
+            indent!(self);
+            for (j, alias) in sequence.iter().enumerate() {
+                if let Some(alias) = alias {
+                    add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
+                }
+            }
+            dedent!(self);
+            add_line!(self, "}},");
+        }
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
+        add_line!(
+            self,
+            "static bool {}(TSLexer *lexer, TSStateId state) {{",
+            name
+        );
+        indent!(self);
+        add_line!(self, "START_LEXER();");
+        add_line!(self, "switch (state) {{");
+        indent!(self);
+
+        for (i, state) in lex_table.states.into_iter().enumerate() {
+            add_line!(self, "case {}:", i);
+            indent!(self);
+            self.add_lex_state(state);
+            dedent!(self);
+        }
+
+        add_line!(self, "default:");
+        indent!(self);
+        add_line!(self, "return false;");
+        dedent!(self);
+
+        dedent!(self);
+        add_line!(self, "}}");
+        dedent!(self);
+        add_line!(self, "}}");
+        add_line!(self, "");
+    }
+
+    fn add_lex_state(&mut self, state: LexState) {
+        if let Some(accept_action) = state.accept_action {
+            add_line!(
+                self,
+                "ACCEPT_TOKEN({})",
+                self.symbol_ids[&accept_action.symbol]
+            );
+        }
+
+        let mut ruled_out_characters = HashSet::new();
+        for (characters, action) in state.advance_actions {
+            let previous_length = self.buffer.len();
+
+            add!(self, "if (");
+            if self.add_character_set_condition(&characters, &ruled_out_characters) {
+                add!(self, ")");
+                indent!(self);
+                if action.in_main_token {
+                    add_line!(self, "ADVANCE({});", action.state);
+                } else {
+                    add_line!(self, "SKIP({});", action.state);
+                }
+                if let CharacterSet::Include(chars) = characters {
+                    ruled_out_characters.extend(chars.iter());
+                }
+                dedent!(self);
+            } else {
+                self.buffer.truncate(previous_length);
+            }
+        }
+
+        add_line!(self, "END_STATE();");
+    }
+
+    fn add_character_set_condition(
+        &mut self,
+        characters: &CharacterSet,
+        ruled_out_characters: &HashSet<char>,
+    ) -> bool {
+        true
+    }
+
+    fn add_lex_modes_list(&mut self) {
+        self.get_external_scanner_state_id(HashSet::new());
+
+        let mut external_tokens_by_corresponding_internal_token = HashMap::new();
+        for (i, external_token) in self.syntax_grammar.external_tokens.iter().enumerate() {
+            if let Some(symbol) = external_token.corresponding_internal_token {
+                external_tokens_by_corresponding_internal_token.insert(symbol.index, i);
+            }
+        }
+
+        add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{");
+        indent!(self);
+        for i in 0..self.parse_table.states.len() {
+            let mut external_tokens = HashSet::new();
+            for token in self.parse_table.states[i].terminal_entries.keys() {
+                if token.is_external() {
+                    external_tokens.insert(token.index);
+                } else if token.is_terminal() {
+                    if let Some(external_index) =
+                        external_tokens_by_corresponding_internal_token.get(&token.index)
+                    {
+                        external_tokens.insert(*external_index);
+                    }
+                }
+            }
+
+            let external_state_id = self.get_external_scanner_state_id(external_tokens);
+            let state = &self.parse_table.states[i];
+            if external_state_id > 0 {
+                add_line!(
+                    self,
+                    "[{}] = {{.lex_state = {}, .external_lex_state = {}}},",
+                    i,
+                    state.lex_state_id,
+                    external_state_id
+                );
+            } else {
+                add_line!(self, "[{}] = {{.lex_state = {}}},", i, state.lex_state_id);
+            }
+        }
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn add_external_token_enum(&mut self) {
+        add_line!(self, "enum {{");
+        indent!(self);
+        for i in 0..self.syntax_grammar.external_tokens.len() {
+            add_line!(
+                self,
+                "{} = {},",
+                self.external_token_id(&self.syntax_grammar.external_tokens[i]),
+                i
+            );
+        }
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn add_external_scanner_symbol_map(&mut self) {
+        add_line!(
+            self,
+            "static TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{"
+        );
+        indent!(self);
+        for i in 0..self.syntax_grammar.external_tokens.len() {
+            add_line!(
+                self,
+                "[{}] = {},",
+                self.external_token_id(&self.syntax_grammar.external_tokens[i]),
+                self.symbol_ids[&Symbol::external(i)],
+            );
+        }
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn add_external_scanner_states_list(&mut self) {
+        add_line!(
+            self,
+            "static bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{",
+            self.external_scanner_states.len(),
+        );
+        indent!(self);
+        for i in 0..self.external_scanner_states.len() {
+            if !self.external_scanner_states[i].is_empty() {
+                add_line!(self, "[{}] = {{", i);
+                indent!(self);
+                for token_index in &self.external_scanner_states[i] {
+                    add_line!(
+                        self,
+                        "[{}] = true,",
+                        self.external_token_id(&self.syntax_grammar.external_tokens[*token_index])
+                    );
+                }
+                dedent!(self);
+                add_line!(self, "}},");
+            }
+        }
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn add_parse_table(&mut self) {
+        let mut parse_table_entries = Vec::new();
+        let mut next_parse_action_list_index = 0;
+
+        self.get_parse_action_list_id(
+            &ParseTableEntry {
+                actions: Vec::new(),
+                reusable: false,
+            },
+            &mut parse_table_entries,
+            &mut next_parse_action_list_index,
+        );
+
+        add_line!(
+            self,
+            "static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {{"
+        );
+        indent!(self);
+        for (i, state) in self.parse_table.states.iter().enumerate() {
+            add_line!(self, "[{}] = {{", i);
+            indent!(self);
+            for (symbol, state_id) in &state.nonterminal_entries {
+                add_line!(self, "[{}] = STATE({}),", self.symbol_ids[symbol], state_id);
+            }
+            for (symbol, entry) in &state.terminal_entries {
+                let entry_id = self.get_parse_action_list_id(
+                    entry,
+                    &mut parse_table_entries,
+                    &mut next_parse_action_list_index,
+                );
+                add_line!(
+                    self,
+                    "[{}] = ACTIONS({}),",
+                    self.symbol_ids[symbol],
+                    entry_id
+                );
+            }
+            dedent!(self);
+            add_line!(self, "}},");
+        }
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+
+        self.add_parse_action_list(parse_table_entries);
+    }
+
+    fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) {
+        add_line!(self, "static TSParseActionEntry ts_parse_actions[] = {{");
+        indent!(self);
+        for (i, entry) in parse_table_entries {
+            add!(
+                self,
+                "  [{}] = {{.count = {}, .reusable = {}}},",
+                i,
+                entry.actions.len(),
+                entry.reusable
+            );
+            for action in entry.actions {
+                add!(self, " ");
+                match action {
+                    ParseAction::Accept => add!(self, " ACCEPT_INPUT()"),
+                    ParseAction::Recover => add!(self, "RECOVER()"),
+                    ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"),
+                    ParseAction::Shift {
+                        state,
+                        is_repetition,
+                    } => {
+                        if is_repetition {
+                            add!(self, "SHIFT_REPEAT({})", state);
+                        } else {
+                            add!(self, "SHIFT({})", state);
+                        }
+                    }
+                    ParseAction::Reduce {
+                        symbol,
+                        child_count,
+                        dynamic_precedence,
+                        alias_sequence_id,
+                        ..
+                    } => {
+                        if !self.symbol_ids.contains_key(&symbol) {
+                            eprintln!(
+                                "SYMBOL: {:?} {:?}",
+                                symbol,
+                                self.metadata_for_symbol(symbol)
+                            );
+                        }
+                        add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
+                        if dynamic_precedence != 0 {
+                            add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
+                        }
+                        if alias_sequence_id != 0 {
+                            add!(self, ", .alias_sequence_id = {}", alias_sequence_id);
+                        }
+                        add!(self, ")");
+                    }
+                }
+                add!(self, ",")
+            }
+            add!(self, "\n");
+        }
+        dedent!(self);
+        add_line!(self, "}};");
+        add_line!(self, "");
+    }
+
+    fn add_parser_export(&mut self) {
+        let language_function_name = format!("tree_sitter_{}", self.language_name);
+        let external_scanner_name = format!("{}_external_scanner", language_function_name);
+
+        if !self.syntax_grammar.external_tokens.is_empty() {
+            add_line!(self, "void *{}_create();", external_scanner_name);
+            add_line!(self, "void {}_destroy(void *);", external_scanner_name);
+            add_line!(
+                self,
+                "bool {}_scan(void *, TSLexer *, const bool *);",
+                external_scanner_name
+            );
+            add_line!(
+                self,
+                "unsigned {}_serialize(void *, char *);",
+                external_scanner_name
+            );
+            add_line!(
+                self,
+                "void {}_deserialize(void *, const char *, unsigned);",
+                external_scanner_name
+            );
+            add_line!(self, "");
+        }
+
+        add_line!(self, "#ifdef _WIN32");
+        add_line!(self, "#define extern __declspec(dllexport)");
+        add_line!(self, "#endif");
+        add_line!(self, "");
+
+        add_line!(
+            self,
+            "extern const TSLanguage *{}() {{",
+            language_function_name
+        );
+        indent!(self);
+        add_line!(self, "static TSLanguage language = {{");
+        indent!(self);
+        add_line!(self, ".version = LANGUAGE_VERSION,");
+        add_line!(self, ".symbol_count = SYMBOL_COUNT,");
+        add_line!(self, ".alias_count = ALIAS_COUNT,");
+        add_line!(self, ".token_count = TOKEN_COUNT,");
+        add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
+        add_line!(
+            self,
+            ".parse_table = (const unsigned short *)ts_parse_table,"
+        );
+        add_line!(self, ".parse_actions = ts_parse_actions,");
+        add_line!(self, ".lex_modes = ts_lex_modes,");
+        add_line!(self, ".symbol_names = ts_symbol_names,");
+        add_line!(
+            self,
+            ".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
+        );
+
+        add_line!(
+            self,
+            ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
+        );
+        add_line!(self, ".lex_fn = ts_lex,");
+
+        if let Some(keyword_capture_token) = self.keyword_capture_token {
+            add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
+            add_line!(
+                self,
+                ".keyword_capture_token = {},",
+                self.symbol_ids[&keyword_capture_token]
+            );
+        }
+
+        add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
+
+        if !self.syntax_grammar.external_tokens.is_empty() {
+            add_line!(self, ".external_scanner = {{");
+            indent!(self);
+            add_line!(self, "(const bool *)ts_external_scanner_states,");
+            add_line!(self, "ts_external_scanner_symbol_map,");
+            add_line!(self, "{}_create,", external_scanner_name);
+            add_line!(self, "{}_destroy,", external_scanner_name);
+            add_line!(self, "{}_scan,", external_scanner_name);
+            add_line!(self, "{}_serialize,", external_scanner_name);
+            add_line!(self, "{}_deserialize,", external_scanner_name);
+            dedent!(self);
+            add_line!(self, "}},");
+        }
+        dedent!(self);
+
+        add_line!(self, "}};");
+        add_line!(self, "return &language;");
+        dedent!(self);
+        add_line!(self, "}}");
+    }
+
+    fn get_parse_action_list_id(
+        &self,
+        entry: &ParseTableEntry,
+        parse_table_entries: &mut Vec<(usize, ParseTableEntry)>,
+        next_parse_action_list_index: &mut usize,
+    ) -> usize {
+        if let Some((index, _)) = parse_table_entries.iter().find(|(_, e)| *e == *entry) {
+            return *index;
+        }
+
+        let result = *next_parse_action_list_index;
+        parse_table_entries.push((result, entry.clone()));
+        *next_parse_action_list_index += 1 + entry.actions.len();
+        result
+    }
+
+    fn get_external_scanner_state_id(&mut self, external_tokens: HashSet<usize>) -> usize {
+        self.external_scanner_states
+            .iter()
+            .position(|tokens| *tokens == external_tokens)
+            .unwrap_or_else(|| {
+                self.external_scanner_states.push(external_tokens);
+                self.external_scanner_states.len() - 1
+            })
+    }
+
+    fn external_token_id(&self, token: &ExternalToken) -> String {
+        format!(
+            "ts_external_token_{}",
+            self.sanitize_identifier(&token.name)
+        )
+    }
+
+    fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet<String>) {
         let mut id;
         if symbol == Symbol::end() {
             id = "ts_builtin_sym_end".to_string();
         } else {
             let (name, kind) = self.metadata_for_symbol(symbol);
             id = match kind {
-                VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_name(name)),
-                VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_name(name)),
+                VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)),
+                VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)),
                 VariableType::Hidden | VariableType::Named => {
-                    format!("sym_{}", self.sanitize_name(name))
+                    format!("sym_{}", self.sanitize_identifier(name))
                 }
             };
 
             let mut suffix_number = 1;
             let mut suffix = String::new();
-            while used_ids.contains(&id) {
+            while used_identifiers.contains(&id) {
                 id.drain(id.len() - suffix.len()..);
                 suffix_number += 1;
                 suffix = suffix_number.to_string();
@@ -149,7 +753,7 @@ impl Generator {
             }
         }
 
-        used_ids.insert(id.clone());
+        used_identifiers.insert(id.clone());
         self.symbol_ids.insert(symbol, id);
     }
 
@@ -171,16 +775,67 @@ impl Generator {
         }
     }
 
-    fn sanitize_name(&self, name: &str) -> String {
-        name.to_string()
+    fn sanitize_identifier(&self, name: &str) -> String {
+        let mut result = String::with_capacity(name.len());
+        for c in name.chars() {
+            if ('a' <= c && c <= 'z')
+                || ('A' <= c && c <= 'Z')
+                || ('0' <= c && c <= '9')
+                || c == '_'
+            {
+                result.push(c);
+            } else {
+                result += match c {
+                    '~' => "TILDE",
+                    '`' => "BQUOTE",
+                    '!' => "BANG",
+                    '@' => "AT",
+                    '#' => "POUND",
+                    '$' => "DOLLAR",
+                    '%' => "PERCENT",
+                    '^' => "CARET",
+                    '&' => "AMP",
+                    '*' => "STAR",
+                    '(' => "LPAREN",
+                    ')' => "RPAREN",
+                    '-' => "DASH",
+                    '+' => "PLUS",
+                    '=' => "EQ",
+                    '{' => "LBRACE",
+                    '}' => "RBRACE",
+                    '[' => "LBRACK",
+                    ']' => "RBRACK",
+                    '\\' => "BSLASH",
+                    '|' => "PIPE",
+                    ':' => "COLON",
+                    ';' => "SEMI",
+                    '"' => "DQUOTE",
+                    '\'' => "SQUOTE",
+                    '<' => "LT",
+                    '>' => "GT",
+                    ',' => "COMMA",
+                    '.' => "DOT",
+                    '?' => "QMARK",
+                    '/' => "SLASH",
+                    '\n' => "LF",
+                    '\r' => "CR",
+                    '\t' => "TAB",
+                    _ => continue,
+                }
+            }
+        }
+        result
     }
 
-    fn indent(&mut self) {
-        self.indent_level += 1;
-    }
-
-    fn dedent(&mut self) {
-        self.indent_level -= 1;
+    fn sanitize_string(&self, name: &str) -> String {
+        let mut result = String::with_capacity(name.len());
+        for c in name.chars() {
+            if ['\\', '\n', '\r', '\"'].contains(&c) {
+                result.push('\\');
+            }
+            result.push(c);
+        }
+        result
     }
 }
 
@@ -206,9 +861,9 @@ pub(crate) fn render_c_code(
         lexical_grammar,
         simple_aliases,
         symbol_ids: HashMap::new(),
-        parse_table_entries: Vec::new(),
-        next_parse_action_list_index: 0,
-        unique_aliases: HashSet::new(),
+        alias_ids: HashMap::new(),
+        external_scanner_states: Vec::new(),
+        alias_map: HashMap::new(),
     }
     .generate()
 }
diff --git a/src/tables.rs b/src/tables.rs
index 9100b81e..01cecb49 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -1,6 +1,7 @@
 use std::collections::HashMap;
 use std::ops::Range;
 use crate::rules::{Associativity, Symbol, Alias};
+use crate::nfa::CharacterSet;
 
 pub(crate) type AliasSequenceId = usize;
 pub(crate) type ParseStateId = usize;
@@ -34,7 +35,8 @@ pub(crate) struct ParseTableEntry {
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct ParseState {
     pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
-    pub nonterminal_entries: HashMap<Symbol, ParseStateId>
+    pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
+    pub lex_state_id: usize,
 }
 
 #[derive(Debug, PartialEq, Eq)]
@@ -60,7 +62,7 @@ pub(crate) struct AcceptTokenAction {
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct LexState {
-    pub advance_actions: HashMap<Symbol, AdvanceAction>,
+    pub advance_actions: HashMap<CharacterSet, AdvanceAction>,
     pub accept_action: Option<AcceptTokenAction>,
 }
 
@@ -78,6 +80,12 @@ impl ParseTableEntry {
     }
 }
 
+impl Default for LexTable {
+    fn default() -> Self {
+        LexTable { states: Vec::new() }
+    }
+}
+
 impl ParseAction {
     pub fn precedence(&self) -> i32 {
         if let ParseAction::Reduce { precedence, .. } = self {

From 479400e5d3e7fdc1395868c0f19fe6415cb68bda Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 29 Dec 2018 13:56:00 -0800
Subject: [PATCH 077/208] Add handling of precedence within tokens

---
 src/nfa.rs                           | 366 +++++++++++++++++-
 src/prepare_grammar/expand_tokens.rs | 557 +++++++++++++++------------
 src/prepare_grammar/mod.rs           |  14 +-
 3 files changed, 670 insertions(+), 267 deletions(-)

diff --git a/src/nfa.rs b/src/nfa.rs
index f6acb67a..4a4fa17b 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -1,5 +1,8 @@
-use std::fmt;
 use std::char;
+use std::cmp::max;
+use std::cmp::Ordering;
+use std::fmt;
+use std::mem::swap;
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub enum CharacterSet {
@@ -13,14 +16,18 @@ pub enum NfaState {
         chars: CharacterSet,
         state_id: u32,
         is_sep: bool,
+        precedence: i32,
     },
     Split(u32, u32),
-    Accept(usize),
+    Accept {
+        variable_index: usize,
+        precedence: i32,
+    },
 }
 
 #[derive(PartialEq, Eq)]
 pub struct Nfa {
-    pub states: Vec<NfaState>
+    pub states: Vec<NfaState>,
 }
 
 impl Default for Nfa {
@@ -78,14 +85,57 @@ impl CharacterSet {
         }
     }
 
-    pub fn add(self, other: CharacterSet) -> Self {
-        if let (CharacterSet::Include(mut chars), CharacterSet::Include(other_chars)) = (self, other) {
-            chars.extend(other_chars);
-            chars.sort_unstable();
-            chars.dedup();
-            CharacterSet::Include(chars)
+    pub fn add(self, other: &CharacterSet) -> Self {
+        if let CharacterSet::Include(other_chars) = other {
+            if let CharacterSet::Include(mut chars) = self {
+                chars.extend(other_chars);
+                chars.sort_unstable();
+                chars.dedup();
+                return CharacterSet::Include(chars);
+            }
+        }
+        panic!("Called add with a negated character set");
+    }
+
+    pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet {
+        match self {
+            CharacterSet::Include(chars) => match other {
+                CharacterSet::Include(other_chars) => {
+                    CharacterSet::Include(remove_chars(chars, other_chars, true))
+                }
+                CharacterSet::Exclude(other_chars) => {
+                    let mut removed = remove_chars(chars, other_chars, false);
+                    add_chars(other_chars, chars);
+                    swap(&mut removed, chars);
+                    CharacterSet::Include(removed)
+                }
+            },
+            CharacterSet::Exclude(chars) => match other {
+                CharacterSet::Include(other_chars) => {
+                    let mut removed = remove_chars(other_chars, chars, false);
+                    add_chars(chars, other_chars);
+                    swap(&mut removed, other_chars);
+                    CharacterSet::Include(removed)
+                }
+                CharacterSet::Exclude(other_chars) => {
+                    let removed = remove_chars(chars, other_chars, true);
+                    let mut included_characters = Vec::new();
+                    let mut other_included_characters = Vec::new();
+                    swap(&mut included_characters, other_chars);
+                    swap(&mut other_included_characters, chars);
+                    *self = CharacterSet::Include(included_characters);
+                    *other = CharacterSet::Include(other_included_characters);
+                    CharacterSet::Exclude(removed)
+                }
+            },
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        if let CharacterSet::Include(c) = self {
+            c.is_empty()
         } else {
-            panic!("Called add with a negated character set");
+            false
         }
     }
 
@@ -97,6 +147,84 @@ impl CharacterSet {
     }
 }
 
+impl Ord for CharacterSet {
+    fn cmp(&self, other: &CharacterSet) -> Ordering {
+        match self {
+            CharacterSet::Include(chars) => {
+                if let CharacterSet::Include(other_chars) = other {
+                    compare_chars(chars, other_chars)
+                } else {
+                    Ordering::Less
+                }
+            }
+            CharacterSet::Exclude(chars) => {
+                if let CharacterSet::Exclude(other_chars) = other {
+                    compare_chars(chars, other_chars)
+                } else {
+                    Ordering::Greater
+                }
+            }
+        }
+    }
+}
+
+impl PartialOrd for CharacterSet {
+    fn partial_cmp(&self, other: &CharacterSet) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+fn add_chars(left: &mut Vec<char>, right: &Vec<char>) {
+    for c in right {
+        match left.binary_search(c) {
+            Err(i) => left.insert(i, *c),
+            _ => {}
+        }
+    }
+}
+
+fn remove_chars(left: &mut Vec<char>, right: &mut Vec<char>, mutate_right: bool) -> Vec<char> {
+    let mut result = Vec::new();
+    right.retain(|right_char| {
+        if let Some(index) = left.iter().position(|left_char| *left_char == *right_char) {
+            left.remove(index);
+            result.push(*right_char);
+            false || !mutate_right
+        } else {
+            true
+        }
+    });
+    result
+}
+
+fn compare_chars(chars: &Vec<char>, other_chars: &Vec<char>) -> Ordering {
+    if chars.is_empty() {
+        if other_chars.is_empty() {
+            Ordering::Equal
+        } else {
+            Ordering::Less
+        }
+    } else if other_chars.is_empty() {
+        Ordering::Greater
+    } else {
+        let mut other_c = other_chars.iter();
+        for c in chars.iter() {
+            if let Some(other_c) = other_c.next() {
+                let cmp = c.cmp(other_c);
+                if cmp != Ordering::Equal {
+                    return cmp;
+                }
+            } else {
+                return Ordering::Greater;
+            }
+        }
+        if other_c.next().is_some() {
+            return Ordering::Less;
+        }
+        Ordering::Equal
+    }
+}
+
 impl Nfa {
     pub fn new() -> Self {
         Nfa { states: Vec::new() }
@@ -124,17 +252,32 @@ impl fmt::Debug for Nfa {
 
 impl<'a> NfaCursor<'a> {
     pub fn new(nfa: &'a Nfa, mut states: Vec<u32>) -> Self {
-        let mut result = Self { nfa, state_ids: Vec::new(), in_sep: true };
+        let mut result = Self {
+            nfa,
+            state_ids: Vec::new(),
+            in_sep: true,
+        };
         result.add_states(&mut states);
         result
     }
 
+    pub fn reset(&mut self, mut states: Vec<u32>) {
+        self.state_ids.clear();
+        self.add_states(&mut states);
+    }
+
     pub fn advance(&mut self, c: char) -> bool {
         let mut result = false;
         let mut new_state_ids = Vec::new();
         let mut any_sep_transitions = false;
         for current_state_id in &self.state_ids {
-            if let NfaState::Advance { chars, state_id, is_sep } = &self.nfa.states[*current_state_id as usize] {
+            if let NfaState::Advance {
+                chars,
+                state_id,
+                is_sep,
+                ..
+            } = &self.nfa.states[*current_state_id as usize]
+            {
                 if chars.contains(c) {
                     if *is_sep {
                         any_sep_transitions = true;
@@ -152,16 +295,68 @@ impl<'a> NfaCursor<'a> {
         result
     }
 
-    pub fn finished_id(&self) -> Option<usize> {
+    pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32)> {
+        self.state_ids.iter().filter_map(move |id| {
+            if let NfaState::Advance {
+                chars,
+                state_id,
+                precedence,
+                ..
+            } = &self.nfa.states[*id as usize]
+            {
+                Some((chars, *precedence, *state_id))
+            } else {
+                None
+            }
+        })
+    }
+
+    pub fn grouped_successors(&self) -> Vec<(CharacterSet, i32, Vec<u32>)> {
+        Self::group_successors(self.successors())
+    }
+
+    fn group_successors<'b>(
+        iter: impl Iterator<Item = (&'b CharacterSet, i32, u32)>,
+    ) -> Vec<(CharacterSet, i32, Vec<u32>)> {
+        let mut result: Vec<(CharacterSet, i32, Vec<u32>)> = Vec::new();
+        for (chars, prec, state) in iter {
+            let mut chars = chars.clone();
+            let mut i = 0;
+            while i < result.len() {
+                let intersection = result[i].0.remove_intersection(&mut chars);
+                if !intersection.is_empty() {
+                    let mut states = result[i].2.clone();
+                    let mut precedence = result[i].1;
+                    states.push(state);
+                    result.insert(i, (intersection, max(precedence, prec), states));
+                    i += 1;
+                }
+                i += 1;
+            }
+            if !chars.is_empty() {
+                result.push((chars, prec, vec![state]));
+            }
+        }
+        result.sort_unstable_by(|a, b| a.0.cmp(&b.0));
+        result
+    }
+
+    pub fn finished_id(&self) -> Option<(usize, i32)> {
         let mut result = None;
         for state_id in self.state_ids.iter() {
-            if let NfaState::Accept(id) = self.nfa.states[*state_id as usize] {
+            if let NfaState::Accept {
+                variable_index,
+                precedence,
+            } = self.nfa.states[*state_id as usize]
+            {
                 match result {
-                    None => {
-                        result = Some(id)
-                    },
-                    Some(existing_id) => if id < existing_id {
-                        result = Some(id)
+                    None => result = Some((variable_index, precedence)),
+                    Some((existing_id, existing_precedence)) => {
+                        if precedence > existing_precedence
+                            || (precedence == existing_precedence && variable_index < existing_id)
+                        {
+                            result = Some((variable_index, precedence))
+                        }
                     }
                 }
             }
@@ -202,3 +397,136 @@ impl<'a> NfaCursor<'a> {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_group_successors() {
+        let table = [
+            (
+                vec![
+                    (CharacterSet::empty().add_range('a', 'f'), 0, 1),
+                    (CharacterSet::empty().add_range('d', 'i'), 1, 2),
+                ],
+                vec![
+                    (CharacterSet::empty().add_range('a', 'c'), 0, vec![1]),
+                    (CharacterSet::empty().add_range('d', 'f'), 1, vec![1, 2]),
+                    (CharacterSet::empty().add_range('g', 'i'), 1, vec![2]),
+                ],
+            ),
+            (
+                vec![
+                    (CharacterSet::empty().add_range('a', 'z'), 0, 1),
+                    (CharacterSet::empty().add_char('d'), 0, 2),
+                    (CharacterSet::empty().add_char('i'), 0, 3),
+                    (CharacterSet::empty().add_char('f'), 0, 4),
+                ],
+                vec![
+                    (
+                        CharacterSet::empty()
+                            .add_range('a', 'c')
+                            .add_char('e')
+                            .add_range('g', 'h')
+                            .add_range('j', 'z'),
+                        0,
+                        vec![1],
+                    ),
+                    (CharacterSet::empty().add_char('d'), 0, vec![1, 2]),
+                    (CharacterSet::empty().add_char('f'), 0, vec![1, 4]),
+                    (CharacterSet::empty().add_char('i'), 0, vec![1, 3]),
+                ],
+            ),
+        ];
+
+        for row in table.iter() {
+            assert_eq!(
+                NfaCursor::group_successors(row.0.iter().map(|(c, p, s)| (c, *p, *s))),
+                row.1
+            );
+        }
+
+        // let successors = NfaCursor::group_successors(
+        //     [
+        //         (&CharacterSet::empty().add_range('a', 'f'), 1),
+        //         (&CharacterSet::empty().add_range('d', 'i'), 2),
+        //     ]
+        //     .iter()
+        //     .cloned(),
+        // );
+        //
+        // assert_eq!(
+        //     successors,
+        //     vec![
+        //         (CharacterSet::empty().add_range('a', 'c'), vec![1],),
+        //         (CharacterSet::empty().add_range('d', 'f'), vec![1, 2],),
+        //         (CharacterSet::empty().add_range('g', 'i'), vec![2],),
+        //     ]
+        // );
+    }
+
+    #[test]
+    fn test_character_set_intersection() {
+        // whitelist - whitelist
+        // both sets contain 'c', 'd', and 'f'
+        let mut a = CharacterSet::empty().add_range('a', 'f');
+        let mut b = CharacterSet::empty().add_range('c', 'h');
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::empty().add_range('c', 'f')
+        );
+        assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
+        assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
+
+        let mut a = CharacterSet::empty().add_range('a', 'f');
+        let mut b = CharacterSet::empty().add_range('c', 'h');
+        assert_eq!(
+            b.remove_intersection(&mut a),
+            CharacterSet::empty().add_range('c', 'f')
+        );
+        assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
+        assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
+
+        // whitelist - blacklist
+        // both sets contain 'e', 'f', and 'm'
+        let mut a = CharacterSet::empty()
+            .add_range('c', 'h')
+            .add_range('k', 'm');
+        let mut b = CharacterSet::empty()
+            .add_range('a', 'd')
+            .add_range('g', 'l')
+            .negate();
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::Include(vec!['e', 'f', 'm'])
+        );
+        assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
+        assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
+
+        let mut a = CharacterSet::empty()
+            .add_range('c', 'h')
+            .add_range('k', 'm');
+        let mut b = CharacterSet::empty()
+            .add_range('a', 'd')
+            .add_range('g', 'l')
+            .negate();
+        assert_eq!(
+            b.remove_intersection(&mut a),
+            CharacterSet::Include(vec!['e', 'f', 'm'])
+        );
+        assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
+        assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
+
+        // blacklist - blacklist
+        // both sets exclude 'c', 'd', and 'e'
+        let mut a = CharacterSet::empty().add_range('a', 'e').negate();
+        let mut b = CharacterSet::empty().add_range('c', 'h').negate();
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::Exclude(vec!['c', 'd', 'e'])
+        );
+        assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
+        assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
+    }
+}
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 5ee9861f..b0d2ae04 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -7,8 +7,18 @@ use regex_syntax::ast::{
     parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
 };
 
-pub(super) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
-    let mut nfa = Nfa::new();
+struct NfaBuilder {
+    nfa: Nfa,
+    is_sep: bool,
+    precedence_stack: Vec<i32>,
+}
+
+pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
+    let mut builder = NfaBuilder {
+        nfa: Nfa::new(),
+        is_sep: true,
+        precedence_stack: vec![0],
+    };
 
     let separator_rule = if grammar.separators.len() > 0 {
         grammar.separators.push(Rule::Blank);
@@ -24,281 +34,325 @@ pub(super) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
             _ => false,
         };
 
-        nfa.states.push(NfaState::Accept(i));
-        let last_state_id = nfa.last_state_id();
-        expand_rule(&variable.rule, &mut nfa, last_state_id, false).map_err(|e| match e {
-            Error::RegexError(msg) => Error::RegexError(format!("Rule {} {}", variable.name, msg)),
-            _ => e,
-        })?;
+        builder.is_sep = false;
+        builder.nfa.states.push(NfaState::Accept {
+            variable_index: i,
+            precedence: 0,
+        });
+        let last_state_id = builder.nfa.last_state_id();
+        builder
+            .expand_rule(&variable.rule, last_state_id)
+            .map_err(|e| match e {
+                Error::RegexError(msg) => {
+                    Error::RegexError(format!("Rule {} {}", variable.name, msg))
+                }
+                _ => e,
+            })?;
 
         if !is_immediate_token {
-            let last_state_id = nfa.last_state_id();
-            expand_rule(&separator_rule, &mut nfa, last_state_id, true)?;
+            builder.is_sep = true;
+            let last_state_id = builder.nfa.last_state_id();
+            builder.expand_rule(&separator_rule, last_state_id)?;
         }
 
         variables.push(LexicalVariable {
             name: variable.name,
             kind: variable.kind,
-            start_state: nfa.last_state_id(),
+            start_state: builder.nfa.last_state_id(),
         });
     }
 
-    Ok(LexicalGrammar { nfa, variables })
+    Ok(LexicalGrammar {
+        nfa: builder.nfa,
+        variables,
+    })
 }
 
-fn expand_rule(rule: &Rule, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool) -> Result<bool> {
-    match rule {
-        Rule::Pattern(s) => {
-            let ast = parse::Parser::new()
-                .parse(&s)
-                .map_err(|e| Error::GrammarError(e.to_string()))?;
-            expand_regex(&ast, nfa, next_state_id, is_sep)
-        }
-        Rule::String(s) => {
-            for c in s.chars().rev() {
-                nfa.prepend(|last_state_id| NfaState::Advance {
-                    chars: CharacterSet::empty().add_char(c),
-                    state_id: last_state_id,
-                    is_sep,
-                });
+impl NfaBuilder {
+    fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
+        match rule {
+            Rule::Pattern(s) => {
+                let ast = parse::Parser::new()
+                    .parse(&s)
+                    .map_err(|e| Error::GrammarError(e.to_string()))?;
+                self.expand_regex(&ast, next_state_id)
             }
-            Ok(s.len() > 0)
-        }
-        Rule::Choice(elements) => {
-            let mut alternative_state_ids = Vec::new();
-            for element in elements {
-                if expand_rule(element, nfa, next_state_id, is_sep)? {
-                    alternative_state_ids.push(nfa.last_state_id());
-                } else {
-                    alternative_state_ids.push(next_state_id);
+            Rule::String(s) => {
+                for c in s.chars().rev() {
+                    self.push_advance(CharacterSet::empty().add_char(c), self.nfa.last_state_id());
                 }
+                Ok(s.len() > 0)
             }
-            alternative_state_ids.retain(|i| *i != nfa.last_state_id());
-            for alternative_state_id in alternative_state_ids {
-                nfa.prepend(|last_state_id| NfaState::Split(last_state_id, alternative_state_id));
-            }
-            Ok(true)
-        }
-        Rule::Seq(elements) => {
-            let mut result = false;
-            for element in elements.into_iter().rev() {
-                if expand_rule(element, nfa, next_state_id, is_sep)? {
-                    result = true;
+            Rule::Choice(elements) => {
+                let mut alternative_state_ids = Vec::new();
+                for element in elements {
+                    if self.expand_rule(element, next_state_id)? {
+                        alternative_state_ids.push(self.nfa.last_state_id());
+                    } else {
+                        alternative_state_ids.push(next_state_id);
+                    }
                 }
-                next_state_id = nfa.last_state_id();
-            }
-            Ok(result)
-        }
-        Rule::Repeat(rule) => {
-            nfa.states.push(NfaState::Accept(0)); // Placeholder for split
-            let split_state_id = nfa.last_state_id();
-            if expand_rule(rule, nfa, split_state_id, is_sep)? {
-                nfa.states[split_state_id as usize] =
-                    NfaState::Split(nfa.last_state_id(), next_state_id);
-                Ok(true)
-            } else {
-                Ok(false)
-            }
-        }
-        Rule::Metadata { rule, .. } => {
-            // TODO - implement precedence
-            expand_rule(rule, nfa, next_state_id, is_sep)
-        }
-        Rule::Blank => Ok(false),
-        _ => Err(Error::grammar(&format!("Unexpected rule {:?}", rule))),
-    }
-}
-
-fn expand_one_or_more(ast: &Ast, nfa: &mut Nfa, next_state_id: u32, is_sep: bool) -> Result<bool> {
-    nfa.states.push(NfaState::Accept(0)); // Placeholder for split
-    let split_state_id = nfa.last_state_id();
-    if expand_regex(&ast, nfa, split_state_id, is_sep)? {
-        nfa.states[split_state_id as usize] = NfaState::Split(nfa.last_state_id(), next_state_id);
-        Ok(true)
-    } else {
-        nfa.states.pop();
-        Ok(false)
-    }
-}
-
-fn expand_zero_or_one(ast: &Ast, nfa: &mut Nfa, next_state_id: u32, is_sep: bool) -> Result<bool> {
-    if expand_regex(ast, nfa, next_state_id, is_sep)? {
-        nfa.prepend(|last_state_id| NfaState::Split(next_state_id, last_state_id));
-        Ok(true)
-    } else {
-        Ok(false)
-    }
-}
-
-fn expand_zero_or_more(ast: &Ast, nfa: &mut Nfa, next_state_id: u32, is_sep: bool) -> Result<bool> {
-    if expand_one_or_more(&ast, nfa, next_state_id, is_sep)? {
-        nfa.prepend(|last_state_id| NfaState::Split(last_state_id, next_state_id));
-        Ok(true)
-    } else {
-        Ok(false)
-    }
-}
-
-fn expand_count(
-    ast: &Ast,
-    count: u32,
-    nfa: &mut Nfa,
-    mut next_state_id: u32,
-    is_sep: bool,
-) -> Result<bool> {
-    let mut result = false;
-    for _ in 0..count {
-        if expand_regex(ast, nfa, next_state_id, is_sep)? {
-            result = true;
-            next_state_id = nfa.last_state_id();
-        }
-    }
-    Ok(result)
-}
-
-fn expand_regex(ast: &Ast, nfa: &mut Nfa, mut next_state_id: u32, is_sep: bool) -> Result<bool> {
-    match ast {
-        Ast::Empty(_) => Ok(false),
-        Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
-        Ast::Literal(literal) => {
-            nfa.states.push(NfaState::Advance {
-                chars: CharacterSet::Include(vec![literal.c]),
-                state_id: next_state_id,
-                is_sep,
-            });
-            Ok(true)
-        }
-        Ast::Dot(_) => {
-            nfa.states.push(NfaState::Advance {
-                chars: CharacterSet::Exclude(vec!['\n']),
-                state_id: next_state_id,
-                is_sep,
-            });
-            Ok(true)
-        }
-        Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
-        Ast::Class(class) => match class {
-            Class::Unicode(_) => Err(Error::regex("Unicode character classes are not supported")),
-            Class::Perl(class) => {
-                nfa.states.push(NfaState::Advance {
-                    chars: expand_perl_character_class(&class.kind),
-                    state_id: next_state_id,
-                    is_sep,
-                });
-                Ok(true)
-            }
-            Class::Bracketed(class) => match &class.kind {
-                ClassSet::Item(item) => {
-                    let character_set = expand_character_class(&item)?;
-                    nfa.states.push(NfaState::Advance {
-                        chars: character_set,
-                        state_id: next_state_id,
-                        is_sep,
+                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
+                for alternative_state_id in alternative_state_ids {
+                    self.nfa.prepend(|last_state_id| {
+                        NfaState::Split(last_state_id, alternative_state_id)
                     });
-                    Ok(true)
                 }
-                ClassSet::BinaryOp(_) => Err(Error::regex(
-                    "Binary operators in character classes aren't supported",
-                )),
-            },
-        },
-        Ast::Repetition(repetition) => match repetition.op.kind {
-            RepetitionKind::ZeroOrOne => {
-                expand_zero_or_one(&repetition.ast, nfa, next_state_id, is_sep)
+                Ok(true)
             }
-            RepetitionKind::OneOrMore => {
-                expand_one_or_more(&repetition.ast, nfa, next_state_id, is_sep)
+            Rule::Seq(elements) => {
+                let mut result = false;
+                for element in elements.into_iter().rev() {
+                    if self.expand_rule(element, next_state_id)? {
+                        result = true;
+                    }
+                    next_state_id = self.nfa.last_state_id();
+                }
+                Ok(result)
             }
-            RepetitionKind::ZeroOrMore => {
-                expand_zero_or_more(&repetition.ast, nfa, next_state_id, is_sep)
-            }
-            RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
-                expand_count(&repetition.ast, count, nfa, next_state_id, is_sep)
-            }
-            RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
-                if expand_zero_or_more(&repetition.ast, nfa, next_state_id, is_sep)? {
-                    expand_count(&repetition.ast, min, nfa, next_state_id, is_sep)
+            Rule::Repeat(rule) => {
+                self.nfa.states.push(NfaState::Accept {
+                    variable_index: 0,
+                    precedence: 0,
+                }); // Placeholder for split
+                let split_state_id = self.nfa.last_state_id();
+                if self.expand_rule(rule, split_state_id)? {
+                    self.nfa.states[split_state_id as usize] =
+                        NfaState::Split(self.nfa.last_state_id(), next_state_id);
+                    Ok(true)
                 } else {
                     Ok(false)
                 }
             }
-            RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
-                let mut result = expand_count(&repetition.ast, min, nfa, next_state_id, is_sep)?;
-                for _ in min..max {
-                    if result {
-                        next_state_id = nfa.last_state_id();
+            Rule::Metadata { rule, params } => {
+                if let Some(precedence) = params.precedence {
+                    self.precedence_stack.push(precedence);
+                }
+                let result = self.expand_rule(rule, next_state_id);
+                if params.precedence.is_some() {
+                    self.precedence_stack.pop();
+                }
+                result
+            }
+            Rule::Blank => Ok(false),
+            _ => Err(Error::grammar(&format!("Unexpected rule {:?}", rule))),
+        }
+    }
+
+    fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result<bool> {
+        match ast {
+            Ast::Empty(_) => Ok(false),
+            Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
+            Ast::Literal(literal) => {
+                self.push_advance(CharacterSet::Include(vec![literal.c]), next_state_id);
+                Ok(true)
+            }
+            Ast::Dot(_) => {
+                self.push_advance(CharacterSet::Exclude(vec!['\n']), next_state_id);
+                Ok(true)
+            }
+            Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
+            Ast::Class(class) => match class {
+                Class::Unicode(_) => {
+                    Err(Error::regex("Unicode character classes are not supported"))
+                }
+                Class::Perl(class) => {
+                    self.push_advance(self.expand_perl_character_class(&class.kind), next_state_id);
+                    Ok(true)
+                }
+                Class::Bracketed(class) => match &class.kind {
+                    ClassSet::Item(item) => {
+                        self.push_advance(self.expand_character_class(&item)?, next_state_id);
+                        Ok(true)
                     }
-                    if expand_zero_or_one(&repetition.ast, nfa, next_state_id, is_sep)? {
+                    ClassSet::BinaryOp(_) => Err(Error::regex(
+                        "Binary operators in character classes aren't supported",
+                    )),
+                },
+            },
+            Ast::Repetition(repetition) => match repetition.op.kind {
+                RepetitionKind::ZeroOrOne => {
+                    self.expand_zero_or_one(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::OneOrMore => {
+                    self.expand_one_or_more(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::ZeroOrMore => {
+                    self.expand_zero_or_more(&repetition.ast, next_state_id)
+                }
+                RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
+                    self.expand_count(&repetition.ast, count, next_state_id)
+                }
+                RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
+                    if self.expand_zero_or_more(&repetition.ast, next_state_id)? {
+                        self.expand_count(&repetition.ast, min, next_state_id)
+                    } else {
+                        Ok(false)
+                    }
+                }
+                RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
+                    let mut result = self.expand_count(&repetition.ast, min, next_state_id)?;
+                    for _ in min..max {
+                        if result {
+                            next_state_id = self.nfa.last_state_id();
+                        }
+                        if self.expand_zero_or_one(&repetition.ast, next_state_id)? {
+                            result = true;
+                        }
+                    }
+                    Ok(result)
+                }
+            },
+            Ast::Group(group) => self.expand_regex(&group.ast, self.nfa.last_state_id()),
+            Ast::Alternation(alternation) => {
+                let mut alternative_state_ids = Vec::new();
+                for ast in alternation.asts.iter() {
+                    if self.expand_regex(&ast, next_state_id)? {
+                        alternative_state_ids.push(self.nfa.last_state_id());
+                    } else {
+                        alternative_state_ids.push(next_state_id);
+                    }
+                }
+                alternative_state_ids.sort_unstable();
+                alternative_state_ids.dedup();
+                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
+
+                for alternative_state_id in alternative_state_ids {
+                    self.nfa.prepend(|last_state_id| {
+                        NfaState::Split(last_state_id, alternative_state_id)
+                    });
+                }
+                Ok(true)
+            }
+            Ast::Concat(concat) => {
+                let mut result = false;
+                for ast in concat.asts.iter().rev() {
+                    if self.expand_regex(&ast, next_state_id)? {
                         result = true;
+                        next_state_id = self.nfa.last_state_id();
                     }
                 }
                 Ok(result)
             }
-        },
-        Ast::Group(group) => expand_regex(&group.ast, nfa, nfa.last_state_id(), is_sep),
-        Ast::Alternation(alternation) => {
-            let mut alternative_state_ids = Vec::new();
-            for ast in alternation.asts.iter() {
-                if expand_regex(&ast, nfa, next_state_id, is_sep)? {
-                    alternative_state_ids.push(nfa.last_state_id());
-                } else {
-                    alternative_state_ids.push(next_state_id);
-                }
-            }
-            alternative_state_ids.retain(|i| *i != nfa.last_state_id());
-            for alternative_state_id in alternative_state_ids {
-                nfa.prepend(|last_state_id| NfaState::Split(last_state_id, alternative_state_id));
-            }
+        }
+    }
+
+    fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        self.nfa.states.push(NfaState::Accept {
+            variable_index: 0,
+            precedence: 0,
+        }); // Placeholder for split
+        let split_state_id = self.nfa.last_state_id();
+        if self.expand_regex(&ast, split_state_id)? {
+            self.nfa.states[split_state_id as usize] =
+                NfaState::Split(self.nfa.last_state_id(), next_state_id);
             Ok(true)
+        } else {
+            self.nfa.states.pop();
+            Ok(false)
         }
-        Ast::Concat(concat) => {
-            let mut result = false;
-            for ast in concat.asts.iter().rev() {
-                if expand_regex(&ast, nfa, next_state_id, is_sep)? {
-                    result = true;
-                    next_state_id = nfa.last_state_id();
+    }
+
+    fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        if self.expand_regex(ast, next_state_id)? {
+            self.nfa
+                .prepend(|last_state_id| NfaState::Split(next_state_id, last_state_id));
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
+        if self.expand_one_or_more(&ast, next_state_id)? {
+            self.nfa
+                .prepend(|last_state_id| NfaState::Split(last_state_id, next_state_id));
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result<bool> {
+        let mut result = false;
+        for _ in 0..count {
+            if self.expand_regex(ast, next_state_id)? {
+                result = true;
+                next_state_id = self.nfa.last_state_id();
+            }
+        }
+        Ok(result)
+    }
+
+    fn expand_character_class(&self, item: &ClassSetItem) -> Result<CharacterSet> {
+        match item {
+            ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
+            ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
+            ClassSetItem::Range(range) => {
+                Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
+            }
+            ClassSetItem::Union(union) => {
+                let mut result = CharacterSet::empty();
+                for item in &union.items {
+                    result = result.add(&self.expand_character_class(&item)?);
                 }
+                Ok(result)
             }
-            Ok(result)
+            ClassSetItem::Perl(class) => Ok(self.expand_perl_character_class(&class.kind)),
+            _ => Err(Error::regex(&format!(
+                "Unsupported character class syntax {:?}",
+                item
+            ))),
         }
     }
-}
 
-fn expand_character_class(item: &ClassSetItem) -> Result<CharacterSet> {
-    match item {
-        ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
-        ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
-        ClassSetItem::Range(range) => {
-            Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
+    fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet {
+        match item {
+            ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
+            ClassPerlKind::Space => CharacterSet::empty()
+                .add_char(' ')
+                .add_char('\t')
+                .add_char('\r')
+                .add_char('\n'),
+            ClassPerlKind::Word => CharacterSet::empty()
+                .add_char('_')
+                .add_range('A', 'Z')
+                .add_range('a', 'z')
+                .add_range('0', '9'),
         }
-        ClassSetItem::Union(union) => {
-            let mut result = CharacterSet::empty();
-            for item in &union.items {
-                result = result.add(expand_character_class(&item)?);
-            }
-            Ok(result)
-        }
-        ClassSetItem::Perl(class) => Ok(expand_perl_character_class(&class.kind)),
-        _ => Err(Error::regex(&format!(
-            "Unsupported character class syntax {:?}",
-            item
-        ))),
     }
-}
 
-fn expand_perl_character_class(item: &ClassPerlKind) -> CharacterSet {
-    match item {
-        ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
-        ClassPerlKind::Space => CharacterSet::empty()
-            .add_char(' ')
-            .add_char('\t')
-            .add_char('\r')
-            .add_char('\n'),
-        ClassPerlKind::Word => CharacterSet::empty()
-            .add_char('_')
-            .add_range('A', 'Z')
-            .add_range('a', 'z')
-            .add_range('0', '9'),
+    fn push_advance(&mut self, chars: CharacterSet, state_id: u32) {
+        let precedence = *self.precedence_stack.last().unwrap();
+        self.add_precedence(precedence, vec![state_id]);
+        self.nfa.states.push(NfaState::Advance {
+            chars,
+            state_id,
+            precedence,
+            is_sep: self.is_sep,
+        });
+    }
+
+    fn add_precedence(&mut self, prec: i32, mut state_ids: Vec<u32>) {
+        let mut i = 0;
+        while i < state_ids.len() {
+            let state_id = state_ids[i];
+            let (left, right) = match &mut self.nfa.states[state_id as usize] {
+                NfaState::Accept {precedence, ..} => {
+                    *precedence = prec;
+                    return;
+                },
+                NfaState::Split(left, right) => (*left, *right),
+                _ => return
+            };
+            if !state_ids.contains(&left) {
+                state_ids.push(left);
+            }
+            if !state_ids.contains(&right) {
+                state_ids.push(right);
+            }
+            i += 1;
+        }
     }
 }
 
@@ -313,11 +367,15 @@ mod tests {
         let mut cursor = NfaCursor::new(&grammar.nfa, start_states);
 
         let mut result = None;
+        let mut result_precedence = 0;
         let mut start_char = 0;
         let mut end_char = 0;
         for c in s.chars() {
-            if let Some(id) = cursor.finished_id() {
-                result = Some((id, &s[start_char..end_char]));
+            if let Some((id, finished_precedence)) = cursor.finished_id() {
+                if result.is_none() || result_precedence <= finished_precedence {
+                    result = Some((id, &s[start_char..end_char]));
+                    result_precedence = finished_precedence;
+                }
             }
             if cursor.advance(c) {
                 end_char += 1;
@@ -329,8 +387,11 @@ mod tests {
             }
         }
 
-        if let Some(id) = cursor.finished_id() {
-            result = Some((id, &s[start_char..end_char]));
+        if let Some((id, finished_precedence)) = cursor.finished_id() {
+            if result.is_none() || result_precedence <= finished_precedence {
+                result = Some((id, &s[start_char..end_char]));
+                result_precedence = finished_precedence;
+            }
         }
 
         result
@@ -443,6 +504,20 @@ mod tests {
                     ("  \\\na", Some((0, "a"))),
                 ],
             },
+            // shorter tokens with higher precedence
+            Row {
+                rules: vec![
+                    Rule::prec(2, Rule::pattern("abc")),
+                    Rule::prec(1, Rule::pattern("ab[cd]e")),
+                    Rule::pattern("[a-e]+"),
+                ],
+                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
+                examples: vec![
+                    ("abceef", Some((0, "abc"))),
+                    ("abdeef", Some((1, "abde"))),
+                    ("aeeeef", Some((2, "aeeee"))),
+                ],
+            },
         ];
 
         for Row {
diff --git a/src/prepare_grammar/mod.rs b/src/prepare_grammar/mod.rs
index f325383b..b0c1d2a3 100644
--- a/src/prepare_grammar/mod.rs
+++ b/src/prepare_grammar/mod.rs
@@ -7,7 +7,7 @@ mod intern_symbols;
 mod process_inlines;
 
 use self::expand_repeats::expand_repeats;
-use self::expand_tokens::expand_tokens;
+pub(crate) use self::expand_tokens::expand_tokens;
 use self::extract_simple_aliases::extract_simple_aliases;
 use self::extract_tokens::extract_tokens;
 use self::flatten_grammar::flatten_grammar;
@@ -19,7 +19,7 @@ use crate::grammars::{
 };
 use crate::rules::{AliasMap, Rule, Symbol};
 
-pub(self) struct IntermediateGrammar<T, U> {
+pub(crate) struct IntermediateGrammar<T, U> {
     variables: Vec<Variable>,
     extra_tokens: Vec<T>,
     expected_conflicts: Vec<Vec<Symbol>>,
@@ -28,14 +28,14 @@ pub(self) struct IntermediateGrammar<T, U> {
     word_token: Option<Symbol>,
 }
 
-pub(self) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
+pub(crate) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
 
-pub(self) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
+pub(crate) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
 
 #[derive(Debug, PartialEq, Eq)]
-pub(self) struct ExtractedLexicalGrammar {
-    variables: Vec<Variable>,
-    separators: Vec<Rule>,
+pub(crate) struct ExtractedLexicalGrammar {
+    pub variables: Vec<Variable>,
+    pub separators: Vec<Rule>,
 }
 
 pub(crate) fn prepare_grammar(

From 605b50e58bf03661774ce7eb18f3b98dbd767ce3 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 29 Dec 2018 13:57:34 -0800
Subject: [PATCH 078/208] Start work on shrinking parse table

---
 src/build_tables/build_parse_table.rs  | 605 ++++++++++++++++++++++++
 src/build_tables/mod.rs                | 630 +------------------------
 src/build_tables/shrink_parse_table.rs | 117 +++++
 src/build_tables/token_conflict_map.rs |  77 +++
 src/tables.rs                          |  56 ++-
 5 files changed, 866 insertions(+), 619 deletions(-)
 create mode 100644 src/build_tables/build_parse_table.rs
 create mode 100644 src/build_tables/shrink_parse_table.rs
 create mode 100644 src/build_tables/token_conflict_map.rs

diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
new file mode 100644
index 00000000..5087c55c
--- /dev/null
+++ b/src/build_tables/build_parse_table.rs
@@ -0,0 +1,605 @@
+use super::item::{LookaheadSet, ParseItem, ParseItemSet};
+use super::item_set_builder::ParseItemSetBuilder;
+use crate::error::{Error, Result};
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::rules::{Alias, AliasMap, Associativity, Symbol, SymbolType};
+use crate::tables::{
+    AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
+};
+use core::ops::Range;
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, HashSet, VecDeque};
+use std::fmt::Write;
+
+#[derive(Clone)]
+struct AuxiliarySymbolInfo {
+    auxiliary_symbol: Symbol,
+    parent_symbols: Vec<Symbol>,
+}
+
+type SymbolSequence = Vec<Symbol>;
+type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
+
+struct ParseStateQueueEntry {
+    preceding_symbols: SymbolSequence,
+    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
+    state_id: ParseStateId,
+}
+
+struct ParseTableBuilder<'a> {
+    item_set_builder: ParseItemSetBuilder<'a>,
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
+    inlines: &'a InlinedProductionMap,
+    state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
+    item_sets_by_state_id: Vec<ParseItemSet<'a>>,
+    parse_state_queue: VecDeque<ParseStateQueueEntry>,
+    parse_table: ParseTable,
+}
+
+impl<'a> ParseTableBuilder<'a> {
+    fn build(mut self) -> Result<ParseTable> {
+        // Ensure that the empty alias sequence has index 0.
+        self.parse_table.alias_sequences.push(Vec::new());
+
+        // Ensure that the error state has index 0.
+        let error_state_id =
+            self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
+
+        self.add_parse_state(
+            &Vec::new(),
+            &Vec::new(),
+            ParseItemSet::with(
+                [(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]
+                    .iter()
+                    .cloned(),
+            ),
+        );
+
+        self.process_part_state_queue()?;
+        self.populate_used_symbols();
+        Ok(self.parse_table)
+    }
+
+    fn add_parse_state(
+        &mut self,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
+        item_set: ParseItemSet<'a>,
+    ) -> ParseStateId {
+        match self.state_ids_by_item_set.entry(item_set) {
+            Entry::Occupied(o) => *o.get(),
+            Entry::Vacant(v) => {
+                let state_id = self.parse_table.states.len();
+                self.item_sets_by_state_id.push(v.key().clone());
+                self.parse_table.states.push(ParseState {
+                    lex_state_id: 0,
+                    terminal_entries: HashMap::new(),
+                    nonterminal_entries: HashMap::new(),
+                });
+                self.parse_state_queue.push_back(ParseStateQueueEntry {
+                    state_id,
+                    preceding_symbols: preceding_symbols.clone(),
+                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
+                });
+                v.insert(state_id);
+                state_id
+            }
+        }
+    }
+
+    fn process_part_state_queue(&mut self) -> Result<()> {
+        while let Some(entry) = self.parse_state_queue.pop_front() {
+            let debug = false;
+
+            if debug {
+                println!(
+                    "ITEM SET {}:\n{}",
+                    entry.state_id,
+                    self.item_sets_by_state_id[entry.state_id]
+                        .display_with(&self.syntax_grammar, &self.lexical_grammar,)
+                );
+            }
+
+            let item_set = self.item_set_builder.transitive_closure(
+                &self.item_sets_by_state_id[entry.state_id],
+                self.syntax_grammar,
+                self.inlines,
+            );
+
+            if debug {
+                println!(
+                    "TRANSITIVE CLOSURE:\n{}",
+                    item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
+                );
+            }
+
+            self.add_actions(
+                entry.preceding_symbols,
+                entry.preceding_auxiliary_symbols,
+                item_set,
+                entry.state_id,
+            )?;
+        }
+        Ok(())
+    }
+
+    fn add_actions(
+        &mut self,
+        mut preceding_symbols: SymbolSequence,
+        mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
+        item_set: ParseItemSet<'a>,
+        state_id: ParseStateId,
+    ) -> Result<()> {
+        let mut terminal_successors = HashMap::new();
+        let mut non_terminal_successors = HashMap::new();
+        let mut lookaheads_with_conflicts = HashSet::new();
+
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(next_symbol) = item.symbol() {
+                let successor = item.successor();
+                if next_symbol.is_non_terminal() {
+                    // Keep track of where auxiliary non-terminals (repeat symbols) are
+                    // used within visible symbols. This information may be needed later
+                    // for conflict resolution.
+                    if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
+                        preceding_auxiliary_symbols
+                            .push(self.get_auxiliary_node_info(&item_set, next_symbol));
+                    }
+
+                    non_terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .entries
+                        .entry(successor)
+                        .or_insert_with(|| LookaheadSet::new())
+                        .insert_all(lookaheads);
+                } else {
+                    terminal_successors
+                        .entry(next_symbol)
+                        .or_insert_with(|| ParseItemSet::default())
+                        .entries
+                        .entry(successor)
+                        .or_insert_with(|| LookaheadSet::new())
+                        .insert_all(lookaheads);
+                }
+            } else {
+                let action = if item.is_augmented() {
+                    ParseAction::Accept
+                } else {
+                    ParseAction::Reduce {
+                        symbol: Symbol::non_terminal(item.variable_index as usize),
+                        child_count: item.step_index as usize,
+                        precedence: item.precedence(),
+                        associativity: item.associativity(),
+                        dynamic_precedence: item.production.dynamic_precedence,
+                        alias_sequence_id: self.get_alias_sequence_id(item),
+                    }
+                };
+
+                for lookahead in lookaheads.iter() {
+                    let entry = self.parse_table.states[state_id]
+                        .terminal_entries
+                        .entry(lookahead);
+                    let entry = entry.or_insert_with(|| ParseTableEntry::new());
+                    if entry.actions.is_empty() {
+                        entry.actions.push(action);
+                    } else if action.precedence() > entry.actions[0].precedence() {
+                        entry.actions.clear();
+                        entry.actions.push(action);
+                        lookaheads_with_conflicts.remove(&lookahead);
+                    } else if action.precedence() == entry.actions[0].precedence() {
+                        entry.actions.push(action);
+                        lookaheads_with_conflicts.insert(lookahead);
+                    }
+                }
+            }
+        }
+
+        for (symbol, next_item_set) in terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+
+            let entry = self.parse_table.states[state_id]
+                .terminal_entries
+                .entry(symbol);
+            if let Entry::Occupied(e) = &entry {
+                if !e.get().actions.is_empty() {
+                    lookaheads_with_conflicts.insert(symbol);
+                }
+            }
+
+            entry
+                .or_insert_with(|| ParseTableEntry::new())
+                .actions
+                .push(ParseAction::Shift {
+                    state: next_state_id,
+                    is_repetition: false,
+                });
+        }
+
+        for (symbol, next_item_set) in non_terminal_successors {
+            preceding_symbols.push(symbol);
+            let next_state_id = self.add_parse_state(
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                next_item_set,
+            );
+            preceding_symbols.pop();
+            self.parse_table.states[state_id]
+                .nonterminal_entries
+                .insert(symbol, next_state_id);
+        }
+
+        for symbol in lookaheads_with_conflicts {
+            self.handle_conflict(
+                &item_set,
+                state_id,
+                &preceding_symbols,
+                &preceding_auxiliary_symbols,
+                symbol,
+            )?;
+        }
+
+        let state = &mut self.parse_table.states[state_id];
+        for extra_token in &self.syntax_grammar.extra_tokens {
+            state
+                .terminal_entries
+                .entry(*extra_token)
+                .or_insert(ParseTableEntry {
+                    reusable: true,
+                    actions: vec![ParseAction::ShiftExtra],
+                });
+        }
+
+        Ok(())
+    }
+
+    fn handle_conflict(
+        &mut self,
+        item_set: &ParseItemSet,
+        state_id: ParseStateId,
+        preceding_symbols: &SymbolSequence,
+        preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
+        conflicting_lookahead: Symbol,
+    ) -> Result<()> {
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+
+        // Determine which items in the set conflict with each other, and the
+        // precedences associated with SHIFT vs REDUCE actions. There won't
+        // be multiple REDUCE actions with different precedences; that is
+        // sorted out ahead of time in `add_actions`. But there can still be
+        // REDUCE-REDUCE conflicts where all actions have the *same*
+        // precedence, and there can still be SHIFT/REDUCE conflicts.
+        let reduce_precedence = entry.actions[0].precedence();
+        let mut considered_associativity = false;
+        let mut shift_precedence: Option<Range<i32>> = None;
+        let mut conflicting_items = HashSet::new();
+        for (item, lookaheads) in &item_set.entries {
+            if let Some(step) = item.step() {
+                if item.step_index > 0 {
+                    if self
+                        .item_set_builder
+                        .first_set(&step.symbol)
+                        .contains(&conflicting_lookahead)
+                    {
+                        conflicting_items.insert(item);
+                        let precedence = item.precedence();
+                        if let Some(range) = &mut shift_precedence {
+                            if precedence < range.start {
+                                range.start = precedence;
+                            } else if precedence > range.end {
+                                range.end = precedence;
+                            }
+                        } else {
+                            shift_precedence = Some(precedence..precedence);
+                        }
+                    }
+                }
+            } else if lookaheads.contains(&conflicting_lookahead) {
+                conflicting_items.insert(item);
+            }
+        }
+
+        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
+            let shift_precedence = shift_precedence.unwrap_or(0..0);
+
+            // If all of the items in the conflict have the same parent symbol,
+            // and that parent symbols is auxiliary, then this is just the intentional
+            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
+            // by leaving it in the parse table, but marking the SHIFT action with
+            // an `is_repetition` flag.
+            let conflicting_variable_index =
+                conflicting_items.iter().next().unwrap().variable_index;
+            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
+                if conflicting_items
+                    .iter()
+                    .all(|item| item.variable_index == conflicting_variable_index)
+                {
+                    *is_repetition = true;
+                    return Ok(());
+                }
+            }
+
+            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
+            if shift_precedence.start > reduce_precedence
+                || (shift_precedence.start == reduce_precedence
+                    && shift_precedence.end > reduce_precedence)
+            {
+                entry.actions.drain(0..entry.actions.len() - 1);
+            }
+            // If the REDUCE actions have higher precedence, remove the SHIFT action.
+            else if shift_precedence.end < reduce_precedence
+                || (shift_precedence.end == reduce_precedence
+                    && shift_precedence.start < reduce_precedence)
+            {
+                entry.actions.pop();
+                conflicting_items.retain(|item| item.is_done());
+            }
+            // If the SHIFT and REDUCE actions have the same predence, consider
+            // the REDUCE actions' associativity.
+            else if shift_precedence == (reduce_precedence..reduce_precedence) {
+                considered_associativity = true;
+                let mut has_left = false;
+                let mut has_right = false;
+                let mut has_non = false;
+                for action in &entry.actions {
+                    if let ParseAction::Reduce { associativity, .. } = action {
+                        match associativity {
+                            Some(Associativity::Left) => has_left = true,
+                            Some(Associativity::Right) => has_right = true,
+                            None => has_non = true,
+                        }
+                    }
+                }
+
+                // If all reduce actions are left associative, remove the SHIFT action.
+                // If all reduce actions are right associative, remove the REDUCE actions.
+                match (has_left, has_non, has_right) {
+                    (true, false, false) => {
+                        entry.actions.pop();
+                        conflicting_items.retain(|item| item.is_done());
+                    }
+                    (false, false, true) => {
+                        entry.actions.drain(0..entry.actions.len() - 1);
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        // If all of the actions but one have been eliminated, then there's no problem.
+        let entry = self.parse_table.states[state_id]
+            .terminal_entries
+            .get_mut(&conflicting_lookahead)
+            .unwrap();
+        if entry.actions.len() == 1 {
+            return Ok(());
+        }
+
+        // Determine the set of parent symbols involved in this conflict.
+        let mut actual_conflict = Vec::new();
+        for item in &conflicting_items {
+            let symbol = Symbol::non_terminal(item.variable_index as usize);
+            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
+                actual_conflict.extend(
+                    preceding_auxiliary_symbols
+                        .iter()
+                        .rev()
+                        .find_map(|info| {
+                            if info.auxiliary_symbol == symbol {
+                                Some(&info.parent_symbols)
+                            } else {
+                                None
+                            }
+                        })
+                        .unwrap()
+                        .iter(),
+                );
+            } else {
+                actual_conflict.push(symbol);
+            }
+        }
+        actual_conflict.sort_unstable();
+        actual_conflict.dedup();
+
+        // If this set of symbols has been whitelisted, then there's no error.
+        if self
+            .syntax_grammar
+            .expected_conflicts
+            .contains(&actual_conflict)
+        {
+            return Ok(());
+        }
+
+        let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
+        for symbol in preceding_symbols {
+            write!(&mut msg, "  {}", self.symbol_name(symbol)).unwrap();
+        }
+
+        write!(
+            &mut msg,
+            "  •  {}  …\n\n",
+            self.symbol_name(&conflicting_lookahead)
+        )
+        .unwrap();
+        write!(&mut msg, "Possible interpretations:\n").unwrap();
+        for (i, item) in conflicting_items.iter().enumerate() {
+            write!(&mut msg, "\n  {}:", i).unwrap();
+
+            for preceding_symbol in preceding_symbols
+                .iter()
+                .take(preceding_symbols.len() - item.step_index as usize)
+            {
+                write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
+            }
+
+            write!(
+                &mut msg,
+                "  ({}",
+                &self.syntax_grammar.variables[item.variable_index as usize].name
+            )
+            .unwrap();
+
+            for (j, step) in item.production.steps.iter().enumerate() {
+                if j as u32 == item.step_index {
+                    write!(&mut msg, "  •").unwrap();
+                }
+                write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
+            }
+
+            write!(&mut msg, ")").unwrap();
+
+            if item.is_done() {
+                write!(
+                    &mut msg,
+                    "  •  {}",
+                    self.symbol_name(&conflicting_lookahead)
+                )
+                .unwrap();
+            }
+
+            let precedence = item.precedence();
+            let associativity = item.associativity();
+            if precedence != 0 || associativity.is_some() {
+                write!(
+                    &mut msg,
+                    "(precedence: {}, associativity: {:?})",
+                    precedence, associativity
+                )
+                .unwrap();
+            }
+        }
+
+        // TODO - generate suggested resolutions
+
+        Err(Error::ConflictError(msg))
+    }
+
+    fn get_auxiliary_node_info(
+        &self,
+        item_set: &ParseItemSet,
+        symbol: Symbol,
+    ) -> AuxiliarySymbolInfo {
+        let parent_symbols = item_set
+            .entries
+            .keys()
+            .filter_map(|item| {
+                if item.symbol() == Some(symbol) {
+                    None
+                } else {
+                    None
+                }
+            })
+            .collect();
+        AuxiliarySymbolInfo {
+            auxiliary_symbol: symbol,
+            parent_symbols,
+        }
+    }
+
+    fn populate_used_symbols(&mut self) {
+        let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
+        let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
+        let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
+        for state in &self.parse_table.states {
+            for symbol in state.terminal_entries.keys() {
+                match symbol.kind {
+                    SymbolType::Terminal => terminal_usages[symbol.index] = true,
+                    SymbolType::External => external_usages[symbol.index] = true,
+                    _ => {}
+                }
+            }
+            for symbol in state.nonterminal_entries.keys() {
+                non_terminal_usages[symbol.index] = true;
+            }
+        }
+        self.parse_table.symbols.push(Symbol::end());
+        for (i, value) in terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::terminal(i));
+            }
+        }
+        for (i, value) in non_terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::non_terminal(i));
+            }
+        }
+        for (i, value) in external_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::external(i));
+            }
+        }
+    }
+
+    fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
+        let mut alias_sequence: Vec<Option<Alias>> = item
+            .production
+            .steps
+            .iter()
+            .map(|s| s.alias.clone())
+            .collect();
+        while alias_sequence.last() == Some(&None) {
+            alias_sequence.pop();
+        }
+        if let Some(index) = self
+            .parse_table
+            .alias_sequences
+            .iter()
+            .position(|seq| *seq == alias_sequence)
+        {
+            index
+        } else {
+            self.parse_table.alias_sequences.push(alias_sequence);
+            self.parse_table.alias_sequences.len() - 1
+        }
+    }
+
+    fn symbol_name(&self, symbol: &Symbol) -> String {
+        match symbol.kind {
+            SymbolType::End => "EOF".to_string(),
+            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
+                .name
+                .clone(),
+            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
+            SymbolType::Terminal => {
+                let variable = &self.lexical_grammar.variables[symbol.index];
+                if variable.kind == VariableType::Named {
+                    variable.name.clone()
+                } else {
+                    format!("\"{}\"", &variable.name)
+                }
+            }
+        }
+    }
+}
+
+pub(crate) fn build_parse_table(
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    inlines: &InlinedProductionMap,
+) -> Result<ParseTable> {
+    ParseTableBuilder {
+        syntax_grammar,
+        lexical_grammar,
+        inlines,
+        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
+        state_ids_by_item_set: HashMap::new(),
+        item_sets_by_state_id: Vec::new(),
+        parse_state_queue: VecDeque::new(),
+        parse_table: ParseTable {
+            states: Vec::new(),
+            alias_sequences: Vec::new(),
+            symbols: Vec::new(),
+        },
+    }
+    .build()
+}
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index fc17ce7f..a5ac74fb 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -1,607 +1,17 @@
+use crate::error::Result;
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::rules::{AliasMap, Symbol};
+use crate::tables::{LexTable, ParseTable};
+
+mod build_parse_table;
 mod item;
 mod item_set_builder;
 mod lex_table_builder;
+mod shrink_parse_table;
+mod token_conflict_map;
 
-use self::item::{LookaheadSet, ParseItem, ParseItemSet};
-use self::item_set_builder::ParseItemSetBuilder;
-use self::lex_table_builder::LexTableBuilder;
-use crate::error::{Error, Result};
-use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
-use crate::rules::Alias;
-use crate::rules::{AliasMap, Associativity, Symbol, SymbolType};
-use crate::tables::{
-    AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
-};
-use core::ops::Range;
-use std::collections::hash_map::Entry;
-use std::collections::{HashMap, HashSet, VecDeque};
-use std::fmt::Write;
-
-#[derive(Clone)]
-struct AuxiliarySymbolInfo {
-    auxiliary_symbol: Symbol,
-    parent_symbols: Vec<Symbol>,
-}
-
-type SymbolSequence = Vec<Symbol>;
-type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
-
-struct ParseStateQueueEntry {
-    preceding_symbols: SymbolSequence,
-    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
-    state_id: ParseStateId,
-}
-
-struct ParseTableBuilder<'a> {
-    item_set_builder: ParseItemSetBuilder<'a>,
-    syntax_grammar: &'a SyntaxGrammar,
-    lexical_grammar: &'a LexicalGrammar,
-    inlines: &'a InlinedProductionMap,
-    simple_aliases: &'a AliasMap,
-    state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
-    item_sets_by_state_id: Vec<ParseItemSet<'a>>,
-    parse_state_queue: VecDeque<ParseStateQueueEntry>,
-    parse_table: ParseTable,
-}
-
-impl<'a> ParseTableBuilder<'a> {
-    fn build(mut self) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
-        // Ensure that the empty alias sequence has index 0.
-        self.parse_table.alias_sequences.push(Vec::new());
-
-        // Ensure that the error state has index 0.
-        let error_state_id =
-            self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
-
-        self.add_parse_state(
-            &Vec::new(),
-            &Vec::new(),
-            ParseItemSet::with(
-                [(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]
-                    .iter()
-                    .cloned(),
-            ),
-        );
-
-        self.process_part_state_queue()?;
-
-        let lex_table_builder = LexTableBuilder::new(self.syntax_grammar, self.lexical_grammar);
-
-        self.populate_used_symbols();
-
-        let (main_lex_table, keyword_lex_table, keyword_capture_token) = lex_table_builder.build();
-        Ok((
-            self.parse_table,
-            main_lex_table,
-            keyword_lex_table,
-            keyword_capture_token,
-        ))
-    }
-
-    fn add_parse_state(
-        &mut self,
-        preceding_symbols: &SymbolSequence,
-        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
-        item_set: ParseItemSet<'a>,
-    ) -> ParseStateId {
-        match self.state_ids_by_item_set.entry(item_set) {
-            Entry::Occupied(o) => {
-                // eprintln!("Item set already processed at state {}", *o.get());
-                *o.get()
-            }
-            Entry::Vacant(v) => {
-                // eprintln!("Item set not yet processed");
-                let state_id = self.parse_table.states.len();
-                self.item_sets_by_state_id.push(v.key().clone());
-                self.parse_table.states.push(ParseState {
-                    lex_state_id: 0,
-                    terminal_entries: HashMap::new(),
-                    nonterminal_entries: HashMap::new(),
-                });
-                self.parse_state_queue.push_back(ParseStateQueueEntry {
-                    state_id,
-                    preceding_symbols: preceding_symbols.clone(),
-                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
-                });
-                v.insert(state_id);
-                state_id
-            }
-        }
-    }
-
-    fn process_part_state_queue(&mut self) -> Result<()> {
-        while let Some(entry) = self.parse_state_queue.pop_front() {
-            let debug = false;
-
-            if debug {
-                println!(
-                    "ITEM SET {}:\n{}",
-                    entry.state_id,
-                    self.item_sets_by_state_id[entry.state_id]
-                        .display_with(&self.syntax_grammar, &self.lexical_grammar,)
-                );
-            }
-
-            let item_set = self.item_set_builder.transitive_closure(
-                &self.item_sets_by_state_id[entry.state_id],
-                self.syntax_grammar,
-                self.inlines,
-            );
-
-            if debug {
-                println!(
-                    "TRANSITIVE CLOSURE:\n{}",
-                    item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
-                );
-            }
-
-            self.add_actions(
-                entry.preceding_symbols,
-                entry.preceding_auxiliary_symbols,
-                item_set,
-                entry.state_id,
-            )?;
-        }
-        Ok(())
-    }
-
-    fn add_actions(
-        &mut self,
-        mut preceding_symbols: SymbolSequence,
-        mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
-        item_set: ParseItemSet<'a>,
-        state_id: ParseStateId,
-    ) -> Result<()> {
-        let mut terminal_successors = HashMap::new();
-        let mut non_terminal_successors = HashMap::new();
-        let mut lookaheads_with_conflicts = HashSet::new();
-
-        for (item, lookaheads) in &item_set.entries {
-            if let Some(next_symbol) = item.symbol() {
-                let successor = item.successor();
-                if next_symbol.is_non_terminal() {
-                    // Keep track of where auxiliary non-terminals (repeat symbols) are
-                    // used within visible symbols. This information may be needed later
-                    // for conflict resolution.
-                    if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
-                        preceding_auxiliary_symbols
-                            .push(self.get_auxiliary_node_info(&item_set, next_symbol));
-                    }
-
-                    non_terminal_successors
-                        .entry(next_symbol)
-                        .or_insert_with(|| ParseItemSet::default())
-                        .entries
-                        .entry(successor)
-                        .or_insert_with(|| LookaheadSet::new())
-                        .insert_all(lookaheads);
-                } else {
-                    terminal_successors
-                        .entry(next_symbol)
-                        .or_insert_with(|| ParseItemSet::default())
-                        .entries
-                        .entry(successor)
-                        .or_insert_with(|| LookaheadSet::new())
-                        .insert_all(lookaheads);
-                }
-            } else {
-                let action = if item.is_augmented() {
-                    ParseAction::Accept
-                } else {
-                    ParseAction::Reduce {
-                        symbol: Symbol::non_terminal(item.variable_index as usize),
-                        child_count: item.step_index as usize,
-                        precedence: item.precedence(),
-                        associativity: item.associativity(),
-                        dynamic_precedence: item.production.dynamic_precedence,
-                        alias_sequence_id: self.get_alias_sequence_id(item),
-                    }
-                };
-
-                for lookahead in lookaheads.iter() {
-                    let entry = self.parse_table.states[state_id]
-                        .terminal_entries
-                        .entry(lookahead);
-                    let entry = entry.or_insert_with(|| ParseTableEntry::new());
-                    if entry.actions.is_empty() {
-                        entry.actions.push(action);
-                    } else if action.precedence() > entry.actions[0].precedence() {
-                        entry.actions.clear();
-                        entry.actions.push(action);
-                        lookaheads_with_conflicts.remove(&lookahead);
-                    } else if action.precedence() == entry.actions[0].precedence() {
-                        entry.actions.push(action);
-                        lookaheads_with_conflicts.insert(lookahead);
-                    }
-                }
-            }
-        }
-
-        for (symbol, next_item_set) in terminal_successors {
-            preceding_symbols.push(symbol);
-            let next_state_id = self.add_parse_state(
-                &preceding_symbols,
-                &preceding_auxiliary_symbols,
-                next_item_set,
-            );
-            preceding_symbols.pop();
-
-            let entry = self.parse_table.states[state_id]
-                .terminal_entries
-                .entry(symbol);
-            if let Entry::Occupied(e) = &entry {
-                if !e.get().actions.is_empty() {
-                    lookaheads_with_conflicts.insert(symbol);
-                }
-            }
-
-            entry
-                .or_insert_with(|| ParseTableEntry::new())
-                .actions
-                .push(ParseAction::Shift {
-                    state: next_state_id,
-                    is_repetition: false,
-                });
-        }
-
-        for (symbol, next_item_set) in non_terminal_successors {
-            preceding_symbols.push(symbol);
-            let next_state_id = self.add_parse_state(
-                &preceding_symbols,
-                &preceding_auxiliary_symbols,
-                next_item_set,
-            );
-            preceding_symbols.pop();
-            self.parse_table.states[state_id]
-                .nonterminal_entries
-                .insert(symbol, next_state_id);
-        }
-
-        for symbol in lookaheads_with_conflicts {
-            self.handle_conflict(
-                &item_set,
-                state_id,
-                &preceding_symbols,
-                &preceding_auxiliary_symbols,
-                symbol,
-            )?;
-        }
-
-        let state = &mut self.parse_table.states[state_id];
-        for extra_token in &self.syntax_grammar.extra_tokens {
-            state
-                .terminal_entries
-                .entry(*extra_token)
-                .or_insert(ParseTableEntry {
-                    reusable: true,
-                    actions: vec![ParseAction::ShiftExtra],
-                });
-        }
-
-        Ok(())
-    }
-
-    fn handle_conflict(
-        &mut self,
-        item_set: &ParseItemSet,
-        state_id: ParseStateId,
-        preceding_symbols: &SymbolSequence,
-        preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
-        conflicting_lookahead: Symbol,
-    ) -> Result<()> {
-        let entry = self.parse_table.states[state_id]
-            .terminal_entries
-            .get_mut(&conflicting_lookahead)
-            .unwrap();
-
-        // Determine which items in the set conflict with each other, and the
-        // precedences associated with SHIFT vs REDUCE actions. There won't
-        // be multiple REDUCE actions with different precedences; that is
-        // sorted out ahead of time in `add_actions`. But there can still be
-        // REDUCE-REDUCE conflicts where all actions have the *same*
-        // precedence, and there can still be SHIFT/REDUCE conflicts.
-        let reduce_precedence = entry.actions[0].precedence();
-        let mut considered_associativity = false;
-        let mut shift_precedence: Option<Range<i32>> = None;
-        let mut conflicting_items = HashSet::new();
-        for (item, lookaheads) in &item_set.entries {
-            if let Some(step) = item.step() {
-                if item.step_index > 0 {
-                    if self
-                        .item_set_builder
-                        .first_set(&step.symbol)
-                        .contains(&conflicting_lookahead)
-                    {
-                        conflicting_items.insert(item);
-                        let precedence = item.precedence();
-                        if let Some(range) = &mut shift_precedence {
-                            if precedence < range.start {
-                                range.start = precedence;
-                            } else if precedence > range.end {
-                                range.end = precedence;
-                            }
-                        } else {
-                            shift_precedence = Some(precedence..precedence);
-                        }
-                    }
-                }
-            } else if lookaheads.contains(&conflicting_lookahead) {
-                conflicting_items.insert(item);
-            }
-        }
-
-        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
-            let shift_precedence = shift_precedence.unwrap_or(0..0);
-
-            // If all of the items in the conflict have the same parent symbol,
-            // and that parent symbols is auxiliary, then this is just the intentional
-            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
-            // by leaving it in the parse table, but marking the SHIFT action with
-            // an `is_repetition` flag.
-            let conflicting_variable_index =
-                conflicting_items.iter().next().unwrap().variable_index;
-            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
-                if conflicting_items
-                    .iter()
-                    .all(|item| item.variable_index == conflicting_variable_index)
-                {
-                    *is_repetition = true;
-                    return Ok(());
-                }
-            }
-
-            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
-            if shift_precedence.start > reduce_precedence
-                || (shift_precedence.start == reduce_precedence
-                    && shift_precedence.end > reduce_precedence)
-            {
-                entry.actions.drain(0..entry.actions.len() - 1);
-            }
-            // If the REDUCE actions have higher precedence, remove the SHIFT action.
-            else if shift_precedence.end < reduce_precedence
-                || (shift_precedence.end == reduce_precedence
-                    && shift_precedence.start < reduce_precedence)
-            {
-                entry.actions.pop();
-                conflicting_items.retain(|item| item.is_done());
-            }
-            // If the SHIFT and REDUCE actions have the same predence, consider
-            // the REDUCE actions' associativity.
-            else if shift_precedence == (reduce_precedence..reduce_precedence) {
-                considered_associativity = true;
-                let mut has_left = false;
-                let mut has_right = false;
-                let mut has_non = false;
-                for action in &entry.actions {
-                    if let ParseAction::Reduce { associativity, .. } = action {
-                        match associativity {
-                            Some(Associativity::Left) => has_left = true,
-                            Some(Associativity::Right) => has_right = true,
-                            None => has_non = true,
-                        }
-                    }
-                }
-
-                // If all reduce actions are left associative, remove the SHIFT action.
-                // If all reduce actions are right associative, remove the REDUCE actions.
-                match (has_left, has_non, has_right) {
-                    (true, false, false) => {
-                        entry.actions.pop();
-                        conflicting_items.retain(|item| item.is_done());
-                    }
-                    (false, false, true) => {
-                        entry.actions.drain(0..entry.actions.len() - 1);
-                    }
-                    _ => {}
-                }
-            }
-        }
-
-        // If all of the actions but one have been eliminated, then there's no problem.
-        let entry = self.parse_table.states[state_id]
-            .terminal_entries
-            .get_mut(&conflicting_lookahead)
-            .unwrap();
-        if entry.actions.len() == 1 {
-            return Ok(());
-        }
-
-        // Determine the set of parent symbols involved in this conflict.
-        let mut actual_conflict = Vec::new();
-        for item in &conflicting_items {
-            let symbol = Symbol::non_terminal(item.variable_index as usize);
-            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
-                actual_conflict.extend(
-                    preceding_auxiliary_symbols
-                        .iter()
-                        .rev()
-                        .find_map(|info| {
-                            if info.auxiliary_symbol == symbol {
-                                Some(&info.parent_symbols)
-                            } else {
-                                None
-                            }
-                        })
-                        .unwrap()
-                        .iter(),
-                );
-            } else {
-                actual_conflict.push(symbol);
-            }
-        }
-        actual_conflict.sort_unstable();
-        actual_conflict.dedup();
-
-        // If this set of symbols has been whitelisted, then there's no error.
-        if self
-            .syntax_grammar
-            .expected_conflicts
-            .contains(&actual_conflict)
-        {
-            return Ok(());
-        }
-
-        let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
-        for symbol in preceding_symbols {
-            write!(&mut msg, "  {}", self.symbol_name(symbol)).unwrap();
-        }
-
-        write!(
-            &mut msg,
-            "  •  {}  …\n\n",
-            self.symbol_name(&conflicting_lookahead)
-        )
-        .unwrap();
-        write!(&mut msg, "Possible interpretations:\n").unwrap();
-        for (i, item) in conflicting_items.iter().enumerate() {
-            write!(&mut msg, "\n  {}:", i).unwrap();
-
-            for preceding_symbol in preceding_symbols
-                .iter()
-                .take(preceding_symbols.len() - item.step_index as usize)
-            {
-                write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
-            }
-
-            write!(
-                &mut msg,
-                "  ({}",
-                &self.syntax_grammar.variables[item.variable_index as usize].name
-            )
-            .unwrap();
-
-            for (j, step) in item.production.steps.iter().enumerate() {
-                if j as u32 == item.step_index {
-                    write!(&mut msg, "  •").unwrap();
-                }
-                write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
-            }
-
-            write!(&mut msg, ")").unwrap();
-
-            if item.is_done() {
-                write!(
-                    &mut msg,
-                    "  •  {}",
-                    self.symbol_name(&conflicting_lookahead)
-                )
-                .unwrap();
-            }
-
-            let precedence = item.precedence();
-            let associativity = item.associativity();
-            if precedence != 0 || associativity.is_some() {
-                write!(
-                    &mut msg,
-                    "(precedence: {}, associativity: {:?})",
-                    precedence, associativity
-                )
-                .unwrap();
-            }
-        }
-
-        // TODO - generate suggested resolutions
-
-        Err(Error::ConflictError(msg))
-    }
-
-    fn get_auxiliary_node_info(
-        &self,
-        item_set: &ParseItemSet,
-        symbol: Symbol,
-    ) -> AuxiliarySymbolInfo {
-        let parent_symbols = item_set
-            .entries
-            .keys()
-            .filter_map(|item| {
-                if item.symbol() == Some(symbol) {
-                    None
-                } else {
-                    None
-                }
-            })
-            .collect();
-        AuxiliarySymbolInfo {
-            auxiliary_symbol: symbol,
-            parent_symbols,
-        }
-    }
-
-    fn populate_used_symbols(&mut self) {
-        let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
-        let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
-        let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
-        for state in &self.parse_table.states {
-            for symbol in state.terminal_entries.keys() {
-                match symbol.kind {
-                    SymbolType::Terminal => terminal_usages[symbol.index] = true,
-                    SymbolType::External => external_usages[symbol.index] = true,
-                    _ => {}
-                }
-            }
-            for symbol in state.nonterminal_entries.keys() {
-                non_terminal_usages[symbol.index] = true;
-            }
-        }
-        self.parse_table.symbols.push(Symbol::end());
-        for (i, value) in terminal_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::terminal(i));
-            }
-        }
-        for (i, value) in non_terminal_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::non_terminal(i));
-            }
-        }
-        for (i, value) in external_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::external(i));
-            }
-        }
-    }
-
-    fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
-        let mut alias_sequence: Vec<Option<Alias>> = item
-            .production
-            .steps
-            .iter()
-            .map(|s| s.alias.clone())
-            .collect();
-        while alias_sequence.last() == Some(&None) {
-            alias_sequence.pop();
-        }
-        if let Some(index) = self
-            .parse_table
-            .alias_sequences
-            .iter()
-            .position(|seq| *seq == alias_sequence)
-        {
-            index
-        } else {
-            self.parse_table.alias_sequences.push(alias_sequence);
-            self.parse_table.alias_sequences.len() - 1
-        }
-    }
-
-    fn symbol_name(&self, symbol: &Symbol) -> String {
-        match symbol.kind {
-            SymbolType::End => "EOF".to_string(),
-            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
-                .name
-                .clone(),
-            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
-            SymbolType::Terminal => {
-                let variable = &self.lexical_grammar.variables[symbol.index];
-                if variable.kind == VariableType::Named {
-                    variable.name.clone()
-                } else {
-                    format!("\"{}\"", &variable.name)
-                }
-            }
-        }
-    }
-}
+use self::build_parse_table::build_parse_table;
+use self::shrink_parse_table::shrink_parse_table;
 
 pub(crate) fn build_tables(
     syntax_grammar: &SyntaxGrammar,
@@ -609,20 +19,8 @@ pub(crate) fn build_tables(
     simple_aliases: &AliasMap,
     inlines: &InlinedProductionMap,
 ) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
-    ParseTableBuilder {
-        syntax_grammar,
-        lexical_grammar,
-        simple_aliases,
-        inlines,
-        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
-        state_ids_by_item_set: HashMap::new(),
-        item_sets_by_state_id: Vec::new(),
-        parse_state_queue: VecDeque::new(),
-        parse_table: ParseTable {
-            states: Vec::new(),
-            alias_sequences: Vec::new(),
-            symbols: Vec::new(),
-        },
-    }
-    .build()
+
+    let mut parse_table = build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
+    shrink_parse_table(&mut parse_table, syntax_grammar, simple_aliases);
+    Ok((parse_table, LexTable::default(), LexTable::default(), None))
 }
diff --git a/src/build_tables/shrink_parse_table.rs b/src/build_tables/shrink_parse_table.rs
new file mode 100644
index 00000000..8e826f5c
--- /dev/null
+++ b/src/build_tables/shrink_parse_table.rs
@@ -0,0 +1,117 @@
+use crate::grammars::{SyntaxGrammar, VariableType};
+use crate::rules::AliasMap;
+use crate::tables::{ParseAction, ParseTable};
+use std::collections::{HashMap, HashSet};
+
+pub(crate) fn shrink_parse_table(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    simple_aliases: &AliasMap,
+) {
+    remove_unit_reductions(parse_table, syntax_grammar, simple_aliases);
+    remove_unused_states(parse_table);
+}
+
+fn remove_unit_reductions(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    simple_aliases: &AliasMap,
+) {
+    let mut aliased_symbols = HashSet::new();
+    for variable in &syntax_grammar.variables {
+        for production in &variable.productions {
+            for step in &production.steps {
+                if step.alias.is_some() {
+                    aliased_symbols.insert(step.symbol);
+                }
+            }
+        }
+    }
+
+    let mut unit_reduction_symbols_by_state = HashMap::new();
+    for (i, state) in parse_table.states.iter().enumerate() {
+        let mut only_unit_reductions = true;
+        let mut unit_reduction_symbol = None;
+        for (_, entry) in &state.terminal_entries {
+            for action in &entry.actions {
+                match action {
+                    ParseAction::ShiftExtra => continue,
+                    ParseAction::Reduce {
+                        child_count: 1,
+                        alias_sequence_id: 0,
+                        symbol,
+                        ..
+                    } => {
+                        if !simple_aliases.contains_key(&symbol)
+                            && !aliased_symbols.contains(&symbol)
+                            && syntax_grammar.variables[symbol.index].kind != VariableType::Named
+                            && (unit_reduction_symbol.is_none()
+                                || unit_reduction_symbol == Some(symbol))
+                        {
+                            unit_reduction_symbol = Some(symbol);
+                            continue;
+                        }
+                    }
+                    _ => {}
+                }
+                only_unit_reductions = false;
+                break;
+            }
+
+            if !only_unit_reductions {
+                break;
+            }
+        }
+
+        if let Some(symbol) = unit_reduction_symbol {
+            if only_unit_reductions {
+                unit_reduction_symbols_by_state.insert(i, *symbol);
+            }
+        }
+    }
+
+    for state in parse_table.states.iter_mut() {
+        let mut done = false;
+        while !done {
+            done = true;
+            state.update_referenced_states(|other_state_id, state| {
+                if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
+                    done = false;
+                    state.nonterminal_entries[symbol]
+                } else {
+                    other_state_id
+                }
+            })
+        }
+    }
+}
+
+fn remove_unused_states(parse_table: &mut ParseTable) {
+    let mut state_usage_map = vec![false; parse_table.states.len()];
+    for state in &parse_table.states {
+        for referenced_state in state.referenced_states() {
+            state_usage_map[referenced_state] = true;
+        }
+    }
+    let mut removed_predecessor_count = 0;
+    let mut state_replacement_map = vec![0; parse_table.states.len()];
+    for state_id in 0..parse_table.states.len() {
+        state_replacement_map[state_id] = state_id - removed_predecessor_count;
+        if !state_usage_map[state_id] {
+            removed_predecessor_count += 1;
+        }
+    }
+    let mut state_id = 0;
+    let mut original_state_id = 0;
+    while state_id < parse_table.states.len() {
+        if state_usage_map[original_state_id] {
+            parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
+                state_replacement_map[other_state_id]
+            });
+            state_id += 1;
+        } else {
+            parse_table.states.remove(state_id);
+        }
+        original_state_id += 1;
+    }
+}
diff --git a/src/build_tables/token_conflict_map.rs b/src/build_tables/token_conflict_map.rs
new file mode 100644
index 00000000..46a00986
--- /dev/null
+++ b/src/build_tables/token_conflict_map.rs
@@ -0,0 +1,77 @@
+use crate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::nfa::{CharacterSet, NfaCursor};
+use std::collections::HashSet;
+
+#[derive(Default)]
+struct TokenConflictStatus {
+    matches_same_string: bool,
+    matches_longer_string_with_valid_next_char: bool,
+}
+
+pub(crate) struct TokenConflictMap {
+    starting_chars_by_index: Vec<CharacterSet>,
+    status_matrix: Vec<TokenConflictStatus>,
+}
+
+impl TokenConflictMap {
+    pub fn new(grammar: &LexicalGrammar) -> Self {
+        let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
+
+        let mut starting_chars_by_index = Vec::with_capacity(grammar.variables.len());
+        for variable in &grammar.variables {
+            cursor.reset(vec![variable.start_state]);
+            let mut all_chars = CharacterSet::empty();
+            for (chars, _, _) in cursor.successors() {
+                all_chars = all_chars.add(chars);
+            }
+            starting_chars_by_index.push(all_chars);
+        }
+
+        let status_matrix =
+            Vec::with_capacity(grammar.variables.len() * grammar.variables.len());
+
+        TokenConflictMap {
+            starting_chars_by_index,
+            status_matrix,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::grammars::{Variable, VariableType};
+    use crate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
+    use crate::rules::Rule;
+
+    #[test]
+    fn test_starting_characters() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: Vec::new(),
+            variables: vec![
+                Variable {
+                    name: "token_0".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("[a-f]1|0x\\d"),
+                },
+                Variable {
+                    name: "token_1".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("d*ef"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let token_map = TokenConflictMap::new(&grammar);
+
+        assert_eq!(
+            token_map.starting_chars_by_index[0],
+            CharacterSet::empty().add_range('a', 'f').add_char('0')
+        );
+        assert_eq!(
+            token_map.starting_chars_by_index[1],
+            CharacterSet::empty().add_range('d', 'e')
+        );
+    }
+}
diff --git a/src/tables.rs b/src/tables.rs
index 01cecb49..0815aac8 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -1,7 +1,7 @@
+use crate::nfa::CharacterSet;
+use crate::rules::{Alias, Associativity, Symbol};
 use std::collections::HashMap;
 use std::ops::Range;
-use crate::rules::{Associativity, Symbol, Alias};
-use crate::nfa::CharacterSet;
 
 pub(crate) type AliasSequenceId = usize;
 pub(crate) type ParseStateId = usize;
@@ -23,7 +23,7 @@ pub(crate) enum ParseAction {
         dynamic_precedence: i32,
         associativity: Option<Associativity>,
         alias_sequence_id: AliasSequenceId,
-    }
+    },
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -86,6 +86,56 @@ impl Default for LexTable {
     }
 }
 
+impl ParseState {
+    pub fn referenced_states<'a>(&'a self) -> impl Iterator<Item = ParseStateId> + 'a {
+        self.terminal_entries
+            .iter()
+            .flat_map(|(_, entry)| {
+                entry.actions.iter().filter_map(|action| match action {
+                    ParseAction::Shift { state, .. } => Some(*state),
+                    _ => None,
+                })
+            })
+            .chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
+    }
+
+    pub fn update_referenced_states<F>(&mut self, mut f: F)
+    where
+        F: FnMut(usize, &ParseState) -> usize,
+    {
+        let mut updates = Vec::new();
+        for (symbol, entry) in &self.terminal_entries {
+            for (i, action) in entry.actions.iter().enumerate() {
+                if let ParseAction::Shift { state, .. } = action {
+                    let result = f(*state, self);
+                    if result != *state {
+                        updates.push((*symbol, i, result));
+                    }
+                }
+            }
+        }
+        for (symbol, other_state) in &self.nonterminal_entries {
+            let result = f(*other_state, self);
+            if result != *other_state {
+                updates.push((*symbol, 0, result));
+            }
+        }
+        for (symbol, action_index, new_state) in updates {
+            if symbol.is_non_terminal() {
+                self.nonterminal_entries.insert(symbol, new_state);
+            } else {
+                let entry = self.terminal_entries.get_mut(&symbol).unwrap();
+                if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
+                    entry.actions[action_index] = ParseAction::Shift {
+                        state: new_state,
+                        is_repetition,
+                    };
+                }
+            }
+        }
+    }
+}
+
 impl ParseAction {
     pub fn precedence(&self) -> i32 {
         if let ParseAction::Reduce { precedence, .. } = self {

From c6b9e97c5820bd2f24c42e58fd2e82944354a6b6 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 30 Dec 2018 19:31:17 -0800
Subject: [PATCH 079/208] Implement token conflict map

---
 src/build_tables/build_parse_table.rs         |  20 +-
 src/build_tables/item_set_builder.rs          |   4 +
 src/build_tables/mod.rs                       |   6 +-
 src/build_tables/token_conflict_map.rs        | 315 +++++++++++++++++-
 src/grammars.rs                               |   7 +
 src/nfa.rs                                    | 156 ++++++---
 src/prepare_grammar/expand_tokens.rs          |  40 ++-
 src/prepare_grammar/extract_simple_aliases.rs |   3 +
 8 files changed, 471 insertions(+), 80 deletions(-)

diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index 5087c55c..a7911689 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -2,7 +2,7 @@ use super::item::{LookaheadSet, ParseItem, ParseItemSet};
 use super::item_set_builder::ParseItemSetBuilder;
 use crate::error::{Error, Result};
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
-use crate::rules::{Alias, AliasMap, Associativity, Symbol, SymbolType};
+use crate::rules::{Alias, Associativity, Symbol, SymbolType};
 use crate::tables::{
     AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
 };
@@ -35,10 +35,11 @@ struct ParseTableBuilder<'a> {
     item_sets_by_state_id: Vec<ParseItemSet<'a>>,
     parse_state_queue: VecDeque<ParseStateQueueEntry>,
     parse_table: ParseTable,
+    following_tokens: Vec<LookaheadSet>,
 }
 
 impl<'a> ParseTableBuilder<'a> {
-    fn build(mut self) -> Result<ParseTable> {
+    fn build(mut self) -> Result<(ParseTable, Vec<LookaheadSet>)> {
         // Ensure that the empty alias sequence has index 0.
         self.parse_table.alias_sequences.push(Vec::new());
 
@@ -58,7 +59,7 @@ impl<'a> ParseTableBuilder<'a> {
 
         self.process_part_state_queue()?;
         self.populate_used_symbols();
-        Ok(self.parse_table)
+        Ok((self.parse_table, self.following_tokens))
     }
 
     fn add_parse_state(
@@ -67,6 +68,16 @@ impl<'a> ParseTableBuilder<'a> {
         preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
         item_set: ParseItemSet<'a>,
     ) -> ParseStateId {
+        if preceding_symbols.len() > 1 {
+            let left_tokens = self.item_set_builder.last_set(&preceding_symbols[preceding_symbols.len() - 2]);
+            let right_tokens = self.item_set_builder.first_set(&preceding_symbols[preceding_symbols.len() - 1]);
+            for left_token in left_tokens.iter() {
+                if left_token.is_terminal() {
+                    self.following_tokens[left_token.index].insert_all(right_tokens);
+                }
+            }
+        }
+
         match self.state_ids_by_item_set.entry(item_set) {
             Entry::Occupied(o) => *o.get(),
             Entry::Vacant(v) => {
@@ -586,7 +597,7 @@ pub(crate) fn build_parse_table(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
     inlines: &InlinedProductionMap,
-) -> Result<ParseTable> {
+) -> Result<(ParseTable, Vec<LookaheadSet>)> {
     ParseTableBuilder {
         syntax_grammar,
         lexical_grammar,
@@ -600,6 +611,7 @@ pub(crate) fn build_parse_table(
             alias_sequences: Vec::new(),
             symbols: Vec::new(),
         },
+        following_tokens: vec![LookaheadSet::new(); lexical_grammar.variables.len()],
     }
     .build()
 }
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
index d7883988..8649cb52 100644
--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@@ -269,6 +269,10 @@ impl<'a> ParseItemSetBuilder<'a> {
         &self.first_sets[symbol]
     }
 
+    pub fn last_set(&self, symbol: &Symbol) -> &LookaheadSet {
+        &self.first_sets[symbol]
+    }
+
     fn add_item(
         &self,
         set: &mut ParseItemSet<'a>,
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index a5ac74fb..d1983068 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -12,6 +12,7 @@ mod token_conflict_map;
 
 use self::build_parse_table::build_parse_table;
 use self::shrink_parse_table::shrink_parse_table;
+use self::token_conflict_map::TokenConflictMap;
 
 pub(crate) fn build_tables(
     syntax_grammar: &SyntaxGrammar,
@@ -19,8 +20,9 @@ pub(crate) fn build_tables(
     simple_aliases: &AliasMap,
     inlines: &InlinedProductionMap,
 ) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
-
-    let mut parse_table = build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
+    let (mut parse_table, following_tokens) =
+        build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
+    let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
     shrink_parse_table(&mut parse_table, syntax_grammar, simple_aliases);
     Ok((parse_table, LexTable::default(), LexTable::default(), None))
 }
diff --git a/src/build_tables/token_conflict_map.rs b/src/build_tables/token_conflict_map.rs
index 46a00986..52c68cc7 100644
--- a/src/build_tables/token_conflict_map.rs
+++ b/src/build_tables/token_conflict_map.rs
@@ -1,40 +1,262 @@
-use crate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::build_tables::item::LookaheadSet;
+use crate::grammars::LexicalGrammar;
 use crate::nfa::{CharacterSet, NfaCursor};
 use std::collections::HashSet;
+use std::fmt;
 
-#[derive(Default)]
+#[derive(Clone, Debug, Default)]
 struct TokenConflictStatus {
+    does_overlap: bool,
+    does_match_valid_continuation: bool,
     matches_same_string: bool,
-    matches_longer_string_with_valid_next_char: bool,
 }
 
 pub(crate) struct TokenConflictMap {
-    starting_chars_by_index: Vec<CharacterSet>,
+    n: usize,
     status_matrix: Vec<TokenConflictStatus>,
+    starting_chars_by_index: Vec<CharacterSet>,
+    following_chars_by_index: Vec<CharacterSet>,
 }
 
 impl TokenConflictMap {
-    pub fn new(grammar: &LexicalGrammar) -> Self {
+    pub fn new(grammar: &LexicalGrammar, following_tokens: Vec<LookaheadSet>) -> Self {
         let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
+        let starting_chars = get_starting_chars(&mut cursor, grammar);
+        let following_chars = get_following_chars(&starting_chars, following_tokens);
 
-        let mut starting_chars_by_index = Vec::with_capacity(grammar.variables.len());
-        for variable in &grammar.variables {
-            cursor.reset(vec![variable.start_state]);
-            let mut all_chars = CharacterSet::empty();
-            for (chars, _, _) in cursor.successors() {
-                all_chars = all_chars.add(chars);
+        let n = grammar.variables.len();
+        let mut status_matrix = vec![TokenConflictStatus::default(); n * n];
+        for i in 0..grammar.variables.len() {
+            for j in 0..i {
+                let status = compute_conflict_status(&mut cursor, grammar, &following_chars, i, j);
+                status_matrix[matrix_index(n, i, j)] = status.0;
+                status_matrix[matrix_index(n, j, i)] = status.1;
             }
-            starting_chars_by_index.push(all_chars);
         }
 
-        let status_matrix =
-            Vec::with_capacity(grammar.variables.len() * grammar.variables.len());
-
         TokenConflictMap {
-            starting_chars_by_index,
+            n,
             status_matrix,
+            starting_chars_by_index: starting_chars,
+            following_chars_by_index: following_chars,
         }
     }
+
+    pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
+    }
+
+    pub fn does_match_valid_continuation(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].does_match_valid_continuation
+    }
+
+    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
+        self.status_matrix[matrix_index(self.n, i, j)].does_overlap
+    }
+}
+
+impl fmt::Debug for TokenConflictMap {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "TokenConflictMap {{\n")?;
+
+        write!(f, "  starting_characters: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {}: {:?},\n", i, self.starting_chars_by_index[i])?;
+        }
+        write!(f, "  }},\n")?;
+
+        write!(f, "  following_characters: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {}: {:?},\n", i, self.following_chars_by_index[i])?;
+        }
+        write!(f, "  }},\n")?;
+
+        write!(f, "  status_matrix: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {}: {{\n", i)?;
+            for j in 0..self.n {
+                write!(
+                    f,
+                    "      {}: {:?},\n",
+                    j,
+                    self.status_matrix[matrix_index(self.n, i, j)]
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
+
+fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
+    variable_count * i + j
+}
+
+fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<CharacterSet> {
+    let mut result = Vec::with_capacity(grammar.variables.len());
+    for variable in &grammar.variables {
+        cursor.reset(vec![variable.start_state]);
+        let mut all_chars = CharacterSet::empty();
+        for (chars, _, _) in cursor.successors() {
+            all_chars = all_chars.add(chars);
+        }
+        result.push(all_chars);
+    }
+    result
+}
+
+fn get_following_chars(
+    starting_chars: &Vec<CharacterSet>,
+    following_tokens: Vec<LookaheadSet>,
+) -> Vec<CharacterSet> {
+    following_tokens
+        .into_iter()
+        .map(|following_tokens| {
+            let mut chars = CharacterSet::empty();
+            for token in following_tokens.iter() {
+                if token.is_terminal() {
+                    chars = chars.add(&starting_chars[token.index]);
+                }
+            }
+            chars
+        })
+        .collect()
+}
+
+fn compute_conflict_status(
+    cursor: &mut NfaCursor,
+    grammar: &LexicalGrammar,
+    following_chars: &Vec<CharacterSet>,
+    i: usize,
+    j: usize,
+) -> (TokenConflictStatus, TokenConflictStatus) {
+    let mut visited_state_sets = HashSet::new();
+    let mut state_set_queue = vec![vec![
+        grammar.variables[i].start_state,
+        grammar.variables[j].start_state,
+    ]];
+    let mut result = (
+        TokenConflictStatus::default(),
+        TokenConflictStatus::default(),
+    );
+
+    while let Some(state_set) = state_set_queue.pop() {
+        // Don't pursue states where there's no potential for conflict.
+        if variable_ids_for_states(&state_set, grammar).count() > 1 {
+            cursor.reset(state_set);
+        } else {
+            continue;
+        }
+
+        let mut completion = None;
+        for (id, precedence) in cursor.completions() {
+            if let Some((prev_id, prev_precedence)) = completion {
+                if id == prev_id {
+                    continue;
+                }
+
+                // Prefer tokens with higher precedence. For tokens with equal precedence,
+                // prefer those listed earlier in the grammar.
+                let winning_id;
+                if prefer_token(grammar, (prev_precedence, prev_id), (precedence, id)) {
+                    winning_id = prev_id;
+                } else {
+                    winning_id = id;
+                    completion = Some((id, precedence));
+                }
+
+                if winning_id == i {
+                    result.0.matches_same_string = true;
+                    result.0.does_overlap = true;
+                } else {
+                    result.1.matches_same_string = true;
+                    result.1.does_overlap = true;
+                }
+            } else {
+                completion = Some((id, precedence));
+            }
+        }
+
+        for (chars, advance_precedence, next_states) in cursor.grouped_successors() {
+            let mut can_advance = true;
+            if let Some((completed_id, completed_precedence)) = completion {
+                let mut other_id = None;
+                let mut successor_contains_completed_id = false;
+                for variable_id in variable_ids_for_states(&next_states, grammar) {
+                    if variable_id == completed_id {
+                        successor_contains_completed_id = true;
+                        break;
+                    } else {
+                        other_id = Some(variable_id);
+                    }
+                }
+
+                if let (Some(other_id), false) = (other_id, successor_contains_completed_id) {
+                    let winning_id;
+                    if advance_precedence < completed_precedence {
+                        winning_id = completed_id;
+                        can_advance = false;
+                    } else {
+                        winning_id = other_id;
+                    }
+
+                    if winning_id == i {
+                        result.0.does_overlap = true;
+                        if chars.does_intersect(&following_chars[j]) {
+                            result.0.does_match_valid_continuation = true;
+                        }
+                    } else {
+                        result.1.does_overlap = true;
+                        if chars.does_intersect(&following_chars[i]) {
+                            result.1.does_match_valid_continuation = true;
+                        }
+                    }
+                }
+            }
+
+            if can_advance && visited_state_sets.insert(next_states.clone()) {
+                state_set_queue.push(next_states);
+            }
+        }
+    }
+    result
+}
+
+fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
+    if left.0 > right.0 {
+        return true;
+    } else if left.0 < right.0 {
+        return false;
+    }
+
+    match (
+        grammar.variables[left.1].is_string,
+        grammar.variables[right.1].is_string,
+    ) {
+        (true, false) => return true,
+        (false, true) => return false,
+        _ => {}
+    }
+
+    left.0 < right.0
+}
+
+fn variable_ids_for_states<'a>(
+    state_ids: &'a Vec<u32>,
+    grammar: &'a LexicalGrammar,
+) -> impl Iterator<Item = usize> + 'a {
+    let mut prev = None;
+    state_ids.iter().filter_map(move |state_id| {
+        let variable_id = grammar.variable_index_for_nfa_state(*state_id);
+        if prev != Some(variable_id) {
+            prev = Some(variable_id);
+            prev
+        } else {
+            None
+        }
+    })
 }
 
 #[cfg(test)]
@@ -42,7 +264,7 @@ mod tests {
     use super::*;
     use crate::grammars::{Variable, VariableType};
     use crate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
-    use crate::rules::Rule;
+    use crate::rules::{Rule, Symbol};
 
     #[test]
     fn test_starting_characters() {
@@ -63,7 +285,7 @@ mod tests {
         })
         .unwrap();
 
-        let token_map = TokenConflictMap::new(&grammar);
+        let token_map = TokenConflictMap::new(&grammar, Vec::new());
 
         assert_eq!(
             token_map.starting_chars_by_index[0],
@@ -74,4 +296,61 @@ mod tests {
             CharacterSet::empty().add_range('d', 'e')
         );
     }
+
+    #[test]
+    fn test_token_conflicts() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: Vec::new(),
+            variables: vec![
+                Variable {
+                    name: "in".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("in"),
+                },
+                Variable {
+                    name: "identifier".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::pattern("\\w+"),
+                },
+                Variable {
+                    name: "instanceof".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("instanceof"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(
+            &grammar,
+            vec![
+                LookaheadSet::with(&[Symbol::terminal(var("identifier"))]),
+                LookaheadSet::with(&[Symbol::terminal(var("in"))]),
+                LookaheadSet::with(&[Symbol::terminal(var("identifier"))]),
+            ],
+        );
+
+        // Given the string "in", the `in` token is preferred over the `identifier` token
+        assert!(token_map.does_match_same_string(var("in"), var("identifier")));
+        assert!(!token_map.does_match_same_string(var("identifier"), var("in")));
+
+        // Depending on what character follows, the string "in" may be treated as part of an
+        // `identifier` token.
+        assert!(token_map.does_match_valid_continuation(var("identifier"), var("in")));
+
+        // Depending on what character follows, the string "instanceof" may be treated as part of
+        // an `identifier` token.
+        assert!(token_map.does_match_valid_continuation(var("identifier"), var("instanceof")));
+        assert!(token_map.does_match_valid_continuation(var("instanceof"), var("in")));
+    }
+
+    fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
+        grammar
+            .variables
+            .iter()
+            .position(|v| v.name == name)
+            .unwrap()
+    }
 }
diff --git a/src/grammars.rs b/src/grammars.rs
index b751e4e4..18da86d8 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -36,6 +36,7 @@ pub(crate) struct InputGrammar {
 pub(crate) struct LexicalVariable {
     pub name: String,
     pub kind: VariableType,
+    pub is_string: bool,
     pub start_state: u32,
 }
 
@@ -179,6 +180,12 @@ impl Variable {
     }
 }
 
+impl LexicalGrammar {
+    pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
+        self.variables.iter().position(|v| v.start_state >= state_id).unwrap()
+    }
+}
+
 impl SyntaxVariable {
     pub fn is_auxiliary(&self) -> bool {
         self.kind == VariableType::Auxiliary
diff --git a/src/nfa.rs b/src/nfa.rs
index 4a4fa17b..738d1b40 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -97,6 +97,19 @@ impl CharacterSet {
         panic!("Called add with a negated character set");
     }
 
+    pub fn does_intersect(&self, other: &CharacterSet) -> bool {
+        match self {
+            CharacterSet::Include(chars) => match other {
+                CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).common,
+                CharacterSet::Exclude(other_chars) => compare_chars(chars, other_chars).left_only,
+            },
+            CharacterSet::Exclude(chars) => match other {
+                CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).right_only,
+                CharacterSet::Exclude(_) => true,
+            },
+        }
+    }
+
     pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet {
         match self {
             CharacterSet::Include(chars) => match other {
@@ -152,14 +165,14 @@ impl Ord for CharacterSet {
         match self {
             CharacterSet::Include(chars) => {
                 if let CharacterSet::Include(other_chars) = other {
-                    compare_chars(chars, other_chars)
+                    order_chars(chars, other_chars)
                 } else {
                     Ordering::Less
                 }
             }
             CharacterSet::Exclude(chars) => {
                 if let CharacterSet::Exclude(other_chars) = other {
-                    compare_chars(chars, other_chars)
+                    order_chars(chars, other_chars)
                 } else {
                     Ordering::Greater
                 }
@@ -197,7 +210,39 @@ fn remove_chars(left: &mut Vec<char>, right: &mut Vec<char>, mutate_right: bool)
     result
 }
 
-fn compare_chars(chars: &Vec<char>, other_chars: &Vec<char>) -> Ordering {
+struct SetComparision {
+    left_only: bool,
+    common: bool,
+    right_only: bool,
+}
+
+fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
+    let mut result = SetComparision {
+        left_only: false,
+        common: false,
+        right_only: false,
+    };
+    let mut left = left.iter().cloned();
+    let mut right = right.iter().cloned();
+    let mut i = left.next();
+    let mut j = right.next();
+    while let (Some(left_char), Some(right_char)) = (i, j) {
+        if left_char < right_char {
+            i = left.next();
+            result.left_only = true;
+        } else if left_char > right_char {
+            j = right.next();
+            result.right_only = true;
+        } else {
+            i = left.next();
+            j = right.next();
+            result.common = true;
+        }
+    }
+    result
+}
+
+fn order_chars(chars: &Vec<char>, other_chars: &Vec<char>) -> Ordering {
     if chars.is_empty() {
         if other_chars.is_empty() {
             Ordering::Equal
@@ -207,19 +252,15 @@ fn compare_chars(chars: &Vec<char>, other_chars: &Vec<char>) -> Ordering {
     } else if other_chars.is_empty() {
         Ordering::Greater
     } else {
-        let mut other_c = other_chars.iter();
-        for c in chars.iter() {
-            if let Some(other_c) = other_c.next() {
-                let cmp = c.cmp(other_c);
-                if cmp != Ordering::Equal {
-                    return cmp;
-                }
-            } else {
-                return Ordering::Greater;
-            }
+        let cmp = chars.len().cmp(&other_chars.len());
+        if cmp != Ordering::Equal {
+            return cmp;
         }
-        if other_c.next().is_some() {
-            return Ordering::Less;
+        for (c, other_c) in chars.iter().zip(other_chars.iter()) {
+            let cmp = c.cmp(other_c);
+            if cmp != Ordering::Equal {
+                return cmp;
+            }
         }
         Ordering::Equal
     }
@@ -233,10 +274,6 @@ impl Nfa {
     pub fn last_state_id(&self) -> u32 {
         self.states.len() as u32 - 1
     }
-
-    pub fn prepend(&mut self, f: impl Fn(u32) -> NfaState) {
-        self.states.push(f(self.last_state_id()));
-    }
 }
 
 impl fmt::Debug for Nfa {
@@ -325,11 +362,17 @@ impl<'a> NfaCursor<'a> {
             while i < result.len() {
                 let intersection = result[i].0.remove_intersection(&mut chars);
                 if !intersection.is_empty() {
-                    let mut states = result[i].2.clone();
-                    let mut precedence = result[i].1;
-                    states.push(state);
-                    result.insert(i, (intersection, max(precedence, prec), states));
-                    i += 1;
+                    if result[i].0.is_empty() {
+                        result[i].0 = intersection;
+                        result[i].1 = max(result[i].1, prec);
+                        result[i].2.push(state);
+                    } else {
+                        let mut states = result[i].2.clone();
+                        let mut precedence = result[i].1;
+                        states.push(state);
+                        result.insert(i, (intersection, max(precedence, prec), states));
+                        i += 1;
+                    }
                 }
                 i += 1;
             }
@@ -341,27 +384,18 @@ impl<'a> NfaCursor<'a> {
         result
     }
 
-    pub fn finished_id(&self) -> Option<(usize, i32)> {
-        let mut result = None;
-        for state_id in self.state_ids.iter() {
+    pub fn completions(&self) -> impl Iterator<Item = (usize, i32)> + '_ {
+        self.state_ids.iter().filter_map(move |state_id| {
             if let NfaState::Accept {
                 variable_index,
                 precedence,
             } = self.nfa.states[*state_id as usize]
             {
-                match result {
-                    None => result = Some((variable_index, precedence)),
-                    Some((existing_id, existing_precedence)) => {
-                        if precedence > existing_precedence
-                            || (precedence == existing_precedence && variable_index < existing_id)
-                        {
-                            result = Some((variable_index, precedence))
-                        }
-                    }
-                }
+                Some((variable_index, precedence))
+            } else {
+                None
             }
-        }
-        result
+        })
     }
 
     pub fn in_separator(&self) -> bool {
@@ -467,7 +501,7 @@ mod tests {
     }
 
     #[test]
-    fn test_character_set_intersection() {
+    fn test_character_set_remove_intersection() {
         // whitelist - whitelist
         // both sets contain 'c', 'd', and 'f'
         let mut a = CharacterSet::empty().add_range('a', 'f');
@@ -529,4 +563,46 @@ mod tests {
         assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
         assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
     }
+
+    #[test]
+    fn test_character_set_does_intersect() {
+        let (a, b) = (CharacterSet::empty(), CharacterSet::empty());
+        assert!(!a.does_intersect(&b));
+        assert!(!b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::empty().add_char('a'),
+            CharacterSet::empty().add_char('a'),
+        );
+        assert!(a.does_intersect(&b));
+        assert!(b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::empty().add_char('b'),
+            CharacterSet::empty().add_char('a').add_char('c'),
+        );
+        assert!(!a.does_intersect(&b));
+        assert!(!b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::Include(vec!['b']),
+            CharacterSet::Exclude(vec!['a', 'b', 'c']),
+        );
+        assert!(!a.does_intersect(&b));
+        assert!(!b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::Include(vec!['b']),
+            CharacterSet::Exclude(vec!['a', 'c']),
+        );
+        assert!(a.does_intersect(&b));
+        assert!(b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::Exclude(vec!['a']),
+            CharacterSet::Exclude(vec!['a']),
+        );
+        assert!(a.does_intersect(&b));
+        assert!(b.does_intersect(&a));
+    }
 }
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index b0d2ae04..2b7e7b4d 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -13,6 +13,14 @@ struct NfaBuilder {
     precedence_stack: Vec<i32>,
 }
 
+fn is_string(rule: &Rule) -> bool {
+    match rule {
+        Rule::String(_) => true,
+        Rule::Metadata { rule, .. } => is_string(rule),
+        _ => false
+    }
+}
+
 pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
     let mut builder = NfaBuilder {
         nfa: Nfa::new(),
@@ -58,6 +66,7 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
         variables.push(LexicalVariable {
             name: variable.name,
             kind: variable.kind,
+            is_string: is_string(&variable.rule),
             start_state: builder.nfa.last_state_id(),
         });
     }
@@ -94,9 +103,7 @@ impl NfaBuilder {
                 }
                 alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
                 for alternative_state_id in alternative_state_ids {
-                    self.nfa.prepend(|last_state_id| {
-                        NfaState::Split(last_state_id, alternative_state_id)
-                    });
+                    self.push_split(alternative_state_id);
                 }
                 Ok(true)
             }
@@ -218,9 +225,7 @@ impl NfaBuilder {
                 alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
 
                 for alternative_state_id in alternative_state_ids {
-                    self.nfa.prepend(|last_state_id| {
-                        NfaState::Split(last_state_id, alternative_state_id)
-                    });
+                    self.push_split(alternative_state_id);
                 }
                 Ok(true)
             }
@@ -255,8 +260,7 @@ impl NfaBuilder {
 
     fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
         if self.expand_regex(ast, next_state_id)? {
-            self.nfa
-                .prepend(|last_state_id| NfaState::Split(next_state_id, last_state_id));
+            self.push_split(next_state_id);
             Ok(true)
         } else {
             Ok(false)
@@ -265,8 +269,7 @@ impl NfaBuilder {
 
     fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
         if self.expand_one_or_more(&ast, next_state_id)? {
-            self.nfa
-                .prepend(|last_state_id| NfaState::Split(last_state_id, next_state_id));
+            self.push_split(next_state_id);
             Ok(true)
         } else {
             Ok(false)
@@ -333,6 +336,11 @@ impl NfaBuilder {
         });
     }
 
+    fn push_split(&mut self, state_id: u32) {
+        let last_state_id = self.nfa.last_state_id();
+        self.nfa.states.push(NfaState::Split(state_id, last_state_id));
+    }
+
     fn add_precedence(&mut self, prec: i32, mut state_ids: Vec<u32>) {
         let mut i = 0;
         while i < state_ids.len() {
@@ -371,10 +379,10 @@ mod tests {
         let mut start_char = 0;
         let mut end_char = 0;
         for c in s.chars() {
-            if let Some((id, finished_precedence)) = cursor.finished_id() {
-                if result.is_none() || result_precedence <= finished_precedence {
+            for (id, precedence) in cursor.completions() {
+                if result.is_none() || result_precedence <= precedence {
                     result = Some((id, &s[start_char..end_char]));
-                    result_precedence = finished_precedence;
+                    result_precedence = precedence;
                 }
             }
             if cursor.advance(c) {
@@ -387,10 +395,10 @@ mod tests {
             }
         }
 
-        if let Some((id, finished_precedence)) = cursor.finished_id() {
-            if result.is_none() || result_precedence <= finished_precedence {
+        for (id, precedence) in cursor.completions() {
+            if result.is_none() || result_precedence <= precedence {
                 result = Some((id, &s[start_char..end_char]));
-                result_precedence = finished_precedence;
+                result_precedence = precedence;
             }
         }
 
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs
index ff7204a0..ee748f5d 100644
--- a/src/prepare_grammar/extract_simple_aliases.rs
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@@ -137,16 +137,19 @@ mod tests {
                 LexicalVariable {
                     name: "t1".to_string(),
                     kind: VariableType::Anonymous,
+                    is_string: true,
                     start_state: 0,
                 },
                 LexicalVariable {
                     name: "t2".to_string(),
                     kind: VariableType::Anonymous,
+                    is_string: true,
                     start_state: 0,
                 },
                 LexicalVariable {
                     name: "t3".to_string(),
                     kind: VariableType::Anonymous,
+                    is_string: true,
                     start_state: 0,
                 }
             ],

From a46b8fcb46a1f8799bd50ebe7e04e7cddf4bff2d Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 1 Jan 2019 13:47:29 -0800
Subject: [PATCH 080/208] Implement parse state merging

---
 src/build_tables/build_parse_table.rs         |  13 +-
 src/build_tables/coincident_tokens.rs         |  36 ++++
 src/build_tables/item.rs                      |  32 +++-
 src/build_tables/mod.rs                       |  88 +++++++++-
 src/build_tables/shrink_parse_table.rs        | 158 +++++++++++++++++-
 ...ken_conflict_map.rs => token_conflicts.rs} |  23 ++-
 src/nfa.rs                                    |  41 +++--
 src/prepare_grammar/expand_tokens.rs          |  12 +-
 src/tables.rs                                 |   1 +
 9 files changed, 364 insertions(+), 40 deletions(-)
 create mode 100644 src/build_tables/coincident_tokens.rs
 rename src/build_tables/{token_conflict_map.rs => token_conflicts.rs} (92%)

diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index a7911689..2fe6fd8d 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -7,7 +7,8 @@ use crate::tables::{
     AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
 };
 use core::ops::Range;
-use std::collections::hash_map::Entry;
+use std::hash::Hasher;
+use std::collections::hash_map::{Entry, DefaultHasher};
 use std::collections::{HashMap, HashSet, VecDeque};
 use std::fmt::Write;
 
@@ -44,14 +45,13 @@ impl<'a> ParseTableBuilder<'a> {
         self.parse_table.alias_sequences.push(Vec::new());
 
         // Ensure that the error state has index 0.
-        let error_state_id =
-            self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
+        self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
 
         self.add_parse_state(
             &Vec::new(),
             &Vec::new(),
             ParseItemSet::with(
-                [(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]
+                [(ParseItem::start(), LookaheadSet::with([Symbol::end()].iter().cloned()))]
                     .iter()
                     .cloned(),
             ),
@@ -78,6 +78,10 @@ impl<'a> ParseTableBuilder<'a> {
             }
         }
 
+        let mut hasher = DefaultHasher::new();
+        item_set.hash_unfinished_items(&mut hasher);
+        let unfinished_item_signature = hasher.finish();
+
         match self.state_ids_by_item_set.entry(item_set) {
             Entry::Occupied(o) => *o.get(),
             Entry::Vacant(v) => {
@@ -87,6 +91,7 @@ impl<'a> ParseTableBuilder<'a> {
                     lex_state_id: 0,
                     terminal_entries: HashMap::new(),
                     nonterminal_entries: HashMap::new(),
+                    unfinished_item_signature,
                 });
                 self.parse_state_queue.push_back(ParseStateQueueEntry {
                     state_id,
diff --git a/src/build_tables/coincident_tokens.rs b/src/build_tables/coincident_tokens.rs
new file mode 100644
index 00000000..10707489
--- /dev/null
+++ b/src/build_tables/coincident_tokens.rs
@@ -0,0 +1,36 @@
+use crate::rules::Symbol;
+use crate::tables::{ParseStateId, ParseTable};
+use std::collections::{HashMap, HashSet};
+
+pub(crate) struct CoincidentTokenIndex {
+    entries: HashMap<(Symbol, Symbol), HashSet<ParseStateId>>,
+    empty: HashSet<ParseStateId>,
+}
+
+impl CoincidentTokenIndex {
+    pub fn new(table: &ParseTable) -> Self {
+        let mut entries = HashMap::new();
+        for (i, state) in table.states.iter().enumerate() {
+            for symbol in state.terminal_entries.keys() {
+                for other_symbol in state.terminal_entries.keys() {
+                    entries
+                        .entry((*symbol, *other_symbol))
+                        .or_insert(HashSet::new())
+                        .insert(i);
+                }
+            }
+        }
+        Self {
+            entries,
+            empty: HashSet::new(),
+        }
+    }
+
+    pub fn states_with(&self, a: Symbol, b: Symbol) -> &HashSet<ParseStateId> {
+        self.entries.get(&(a, b)).unwrap_or(&self.empty)
+    }
+
+    pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
+        self.entries.contains_key(&(a, b))
+    }
+}
diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index 28723d24..4cd2f643 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -2,11 +2,11 @@ use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}
 use crate::rules::Associativity;
 use crate::rules::{Symbol, SymbolType};
 use smallbitvec::SmallBitVec;
+use std::cmp::Ordering;
 use std::collections::BTreeMap;
 use std::fmt;
 use std::hash::{Hash, Hasher};
 use std::u32;
-use std::cmp::Ordering;
 
 lazy_static! {
     static ref START_PRODUCTION: Production = Production {
@@ -85,10 +85,10 @@ impl LookaheadSet {
             .chain(if self.eof { Some(Symbol::end()) } else { None })
     }
 
-    pub fn with<'a>(symbols: impl IntoIterator<Item = &'a Symbol>) -> Self {
+    pub fn with(symbols: impl IntoIterator<Item = Symbol>) -> Self {
         let mut result = Self::new();
         for symbol in symbols {
-            result.insert(*symbol);
+            result.insert(symbol);
         }
         result
     }
@@ -219,6 +219,21 @@ impl<'a> ParseItemSet<'a> {
         result
     }
 
+    pub fn hash_unfinished_items(&self, h: &mut impl Hasher) {
+        let mut previous_variable_index = u32::MAX;
+        let mut previous_step_index = u32::MAX;
+        for item in self.entries.keys() {
+            if item.step().is_none() && item.variable_index != previous_variable_index
+                || item.step_index != previous_step_index
+            {
+                h.write_u32(item.variable_index);
+                h.write_u32(item.step_index);
+                previous_variable_index = item.variable_index;
+                previous_step_index = item.step_index;
+            }
+        }
+    }
+
     pub fn display_with(
         &'a self,
         syntax_grammar: &'a SyntaxGrammar,
@@ -369,11 +384,18 @@ impl<'a> Ord for ParseItem<'a> {
         if o != Ordering::Equal {
             return o;
         }
-        let o = self.production.dynamic_precedence.cmp(&other.production.dynamic_precedence);
+        let o = self
+            .production
+            .dynamic_precedence
+            .cmp(&other.production.dynamic_precedence);
         if o != Ordering::Equal {
             return o;
         }
-        let o = self.production.steps.len().cmp(&other.production.steps.len());
+        let o = self
+            .production
+            .steps
+            .len()
+            .cmp(&other.production.steps.len());
         if o != Ordering::Equal {
             return o;
         }
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index d1983068..665c56a0 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -1,18 +1,20 @@
-use crate::error::Result;
-use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
-use crate::rules::{AliasMap, Symbol};
-use crate::tables::{LexTable, ParseTable};
-
 mod build_parse_table;
+mod coincident_tokens;
 mod item;
 mod item_set_builder;
 mod lex_table_builder;
 mod shrink_parse_table;
-mod token_conflict_map;
+mod token_conflicts;
 
 use self::build_parse_table::build_parse_table;
+use self::coincident_tokens::CoincidentTokenIndex;
+use self::item::LookaheadSet;
 use self::shrink_parse_table::shrink_parse_table;
-use self::token_conflict_map::TokenConflictMap;
+use self::token_conflicts::TokenConflictMap;
+use crate::error::Result;
+use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::rules::{AliasMap, Symbol};
+use crate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
 
 pub(crate) fn build_tables(
     syntax_grammar: &SyntaxGrammar,
@@ -23,6 +25,76 @@ pub(crate) fn build_tables(
     let (mut parse_table, following_tokens) =
         build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
     let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
-    shrink_parse_table(&mut parse_table, syntax_grammar, simple_aliases);
+    let coincident_token_index = CoincidentTokenIndex::new(&parse_table);
+    populate_error_state(
+        &mut parse_table,
+        syntax_grammar,
+        lexical_grammar,
+        &coincident_token_index,
+        &token_conflict_map,
+    );
+    shrink_parse_table(
+        &mut parse_table,
+        syntax_grammar,
+        simple_aliases,
+        &token_conflict_map,
+    );
     Ok((parse_table, LexTable::default(), LexTable::default(), None))
 }
+
+fn populate_error_state(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    coincident_token_index: &CoincidentTokenIndex,
+    token_conflict_map: &TokenConflictMap,
+) {
+    let state = &mut parse_table.states[0];
+    let n = lexical_grammar.variables.len();
+    let conflict_free_tokens = LookaheadSet::with((0..n).into_iter().filter_map(|i| {
+        let conflicts_with_other_tokens = (0..n).into_iter().all(|j| {
+            j == i
+                || coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
+                || !token_conflict_map.does_conflict(i, j)
+        });
+        if conflicts_with_other_tokens {
+            None
+        } else {
+            Some(Symbol::terminal(i))
+        }
+    }));
+
+    let recover_entry = ParseTableEntry {
+        reusable: false,
+        actions: vec![ParseAction::Recover],
+    };
+
+    for i in 0..n {
+        let symbol = Symbol::terminal(i);
+        let can_be_used_for_recovery = conflict_free_tokens.contains(&symbol)
+            || conflict_free_tokens.iter().all(|t| {
+                coincident_token_index.contains(symbol, t)
+                    || !token_conflict_map.does_conflict(i, t.index)
+            });
+        if can_be_used_for_recovery {
+            eprintln!("include {}", &lexical_grammar.variables[symbol.index].name);
+            state
+                .terminal_entries
+                .entry(symbol)
+                .or_insert_with(|| recover_entry.clone());
+        } else {
+            eprintln!("exclude {}", &lexical_grammar.variables[symbol.index].name);
+        }
+    }
+
+    for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
+        if external_token.corresponding_internal_token.is_none() {
+            state
+                .terminal_entries
+                .entry(Symbol::external(i))
+                .or_insert_with(|| recover_entry.clone());
+        }
+    }
+
+    state.terminal_entries.insert(Symbol::end(), recover_entry);
+}
diff --git a/src/build_tables/shrink_parse_table.rs b/src/build_tables/shrink_parse_table.rs
index 8e826f5c..026c3058 100644
--- a/src/build_tables/shrink_parse_table.rs
+++ b/src/build_tables/shrink_parse_table.rs
@@ -1,14 +1,17 @@
+use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{SyntaxGrammar, VariableType};
-use crate::rules::AliasMap;
-use crate::tables::{ParseAction, ParseTable};
+use crate::rules::{AliasMap, Symbol};
+use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
 use std::collections::{HashMap, HashSet};
 
 pub(crate) fn shrink_parse_table(
     parse_table: &mut ParseTable,
     syntax_grammar: &SyntaxGrammar,
     simple_aliases: &AliasMap,
+    token_conflict_map: &TokenConflictMap,
 ) {
     remove_unit_reductions(parse_table, syntax_grammar, simple_aliases);
+    merge_compatible_states(parse_table, syntax_grammar, token_conflict_map);
     remove_unused_states(parse_table);
 }
 
@@ -86,6 +89,157 @@ fn remove_unit_reductions(
     }
 }
 
+fn merge_compatible_states(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    token_conflict_map: &TokenConflictMap,
+) {
+    let mut state_ids_by_signature = HashMap::new();
+    for (i, state) in parse_table.states.iter().enumerate() {
+        state_ids_by_signature
+            .entry(state.unfinished_item_signature)
+            .or_insert(Vec::new())
+            .push(i);
+    }
+
+    let mut deleted_states = HashSet::new();
+    loop {
+        let mut state_replacements = HashMap::new();
+        for (_, state_ids) in &state_ids_by_signature {
+            for i in state_ids {
+                for j in state_ids {
+                    if j == i {
+                        break;
+                    }
+                    if deleted_states.contains(j) || deleted_states.contains(i) {
+                        continue;
+                    }
+                    if merge_parse_state(syntax_grammar, token_conflict_map, parse_table, *j, *i) {
+                        deleted_states.insert(*i);
+                        state_replacements.insert(*i, *j);
+                    }
+                }
+            }
+        }
+
+        if state_replacements.is_empty() {
+            break;
+        }
+
+        for state in parse_table.states.iter_mut() {
+            state.update_referenced_states(|other_state_id, _| {
+                *state_replacements
+                    .get(&other_state_id)
+                    .unwrap_or(&other_state_id)
+            });
+        }
+    }
+}
+
+fn merge_parse_state(
+    syntax_grammar: &SyntaxGrammar,
+    token_conflict_map: &TokenConflictMap,
+    parse_table: &mut ParseTable,
+    left: usize,
+    right: usize,
+) -> bool {
+    let left_state = &parse_table.states[left];
+    let right_state = &parse_table.states[right];
+
+    if left_state.nonterminal_entries != right_state.nonterminal_entries {
+        return false;
+    }
+
+    for (symbol, left_entry) in &left_state.terminal_entries {
+        if let Some(right_entry) = right_state.terminal_entries.get(symbol) {
+            if right_entry.actions != left_entry.actions {
+                return false;
+            }
+        } else if !can_add_entry_to_state(
+            syntax_grammar,
+            token_conflict_map,
+            right_state,
+            *symbol,
+            left_entry,
+        ) {
+            return false;
+        }
+    }
+
+    eprintln!("maybe merge {} {}", left, right);
+
+    let mut symbols_to_add = Vec::new();
+    for (symbol, right_entry) in &right_state.terminal_entries {
+        if !left_state.terminal_entries.contains_key(&symbol) {
+            if !can_add_entry_to_state(
+                syntax_grammar,
+                token_conflict_map,
+                left_state,
+                *symbol,
+                right_entry,
+            ) {
+                return false;
+            }
+            symbols_to_add.push(*symbol);
+        }
+    }
+
+    for symbol in symbols_to_add {
+        let entry = parse_table.states[right].terminal_entries[&symbol].clone();
+        parse_table.states[left]
+            .terminal_entries
+            .insert(symbol, entry);
+    }
+
+    true
+}
+
+fn can_add_entry_to_state(
+    syntax_grammar: &SyntaxGrammar,
+    token_conflict_map: &TokenConflictMap,
+    state: &ParseState,
+    token: Symbol,
+    entry: &ParseTableEntry,
+) -> bool {
+    // Do not add external tokens; they could conflict lexically with any of the state's
+    // existing lookahead tokens.
+    if token.is_external() {
+        return false;
+    }
+
+    // Only merge parse states by allowing existing reductions to happen
+    // with additional lookahead tokens. Do not alter parse states in ways
+    // that allow entirely new types of actions to happen.
+    if state.terminal_entries.iter().all(|(_, e)| e != entry) {
+        return false;
+    }
+    match entry.actions.last() {
+        Some(ParseAction::Reduce { .. }) => {}
+        _ => return false,
+    }
+
+    // Do not add tokens which are both internal and external. Their validity could
+    // influence the behavior of the external scanner.
+    if syntax_grammar
+        .external_tokens
+        .iter()
+        .any(|t| t.corresponding_internal_token == Some(token))
+    {
+        return false;
+    }
+
+    // Do not add a token if it conflicts with an existing token.
+    if token.is_terminal() {
+        for existing_token in state.terminal_entries.keys() {
+            if token_conflict_map.does_conflict(token.index, existing_token.index) {
+                return false;
+            }
+        }
+    }
+
+    true
+}
+
 fn remove_unused_states(parse_table: &mut ParseTable) {
     let mut state_usage_map = vec![false; parse_table.states.len()];
     for state in &parse_table.states {
diff --git a/src/build_tables/token_conflict_map.rs b/src/build_tables/token_conflicts.rs
similarity index 92%
rename from src/build_tables/token_conflict_map.rs
rename to src/build_tables/token_conflicts.rs
index 52c68cc7..09d5e97c 100644
--- a/src/build_tables/token_conflict_map.rs
+++ b/src/build_tables/token_conflicts.rs
@@ -8,6 +8,7 @@ use std::fmt;
 struct TokenConflictStatus {
     does_overlap: bool,
     does_match_valid_continuation: bool,
+    does_match_separators: bool,
     matches_same_string: bool,
 }
 
@@ -46,8 +47,9 @@ impl TokenConflictMap {
         self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
     }
 
-    pub fn does_match_valid_continuation(&self, i: usize, j: usize) -> bool {
-        self.status_matrix[matrix_index(self.n, i, j)].does_match_valid_continuation
+    pub fn does_conflict(&self, i: usize, j: usize) -> bool {
+        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
+        entry.does_match_valid_continuation || entry.does_match_separators
     }
 
     pub fn does_overlap(&self, i: usize, j: usize) -> bool {
@@ -207,10 +209,15 @@ fn compute_conflict_status(
                         if chars.does_intersect(&following_chars[j]) {
                             result.0.does_match_valid_continuation = true;
                         }
+                        if cursor.in_separator() {
+                            result.0.does_match_separators = true;
+                        }
                     } else {
                         result.1.does_overlap = true;
                         if chars.does_intersect(&following_chars[i]) {
                             result.1.does_match_valid_continuation = true;
+                        } else {
+                            result.1.does_match_separators = true;
                         }
                     }
                 }
@@ -326,9 +333,9 @@ mod tests {
         let token_map = TokenConflictMap::new(
             &grammar,
             vec![
-                LookaheadSet::with(&[Symbol::terminal(var("identifier"))]),
-                LookaheadSet::with(&[Symbol::terminal(var("in"))]),
-                LookaheadSet::with(&[Symbol::terminal(var("identifier"))]),
+                LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
+                LookaheadSet::with([Symbol::terminal(var("in"))].iter().cloned()),
+                LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
             ],
         );
 
@@ -338,12 +345,12 @@ mod tests {
 
         // Depending on what character follows, the string "in" may be treated as part of an
         // `identifier` token.
-        assert!(token_map.does_match_valid_continuation(var("identifier"), var("in")));
+        assert!(token_map.does_conflict(var("identifier"), var("in")));
 
         // Depending on what character follows, the string "instanceof" may be treated as part of
         // an `identifier` token.
-        assert!(token_map.does_match_valid_continuation(var("identifier"), var("instanceof")));
-        assert!(token_map.does_match_valid_continuation(var("instanceof"), var("in")));
+        assert!(token_map.does_conflict(var("identifier"), var("instanceof")));
+        assert!(token_map.does_conflict(var("instanceof"), var("in")));
     }
 
     fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
diff --git a/src/nfa.rs b/src/nfa.rs
index 738d1b40..ee39d178 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -86,15 +86,34 @@ impl CharacterSet {
     }
 
     pub fn add(self, other: &CharacterSet) -> Self {
-        if let CharacterSet::Include(other_chars) = other {
-            if let CharacterSet::Include(mut chars) = self {
-                chars.extend(other_chars);
-                chars.sort_unstable();
-                chars.dedup();
-                return CharacterSet::Include(chars);
-            }
+        match self {
+            CharacterSet::Include(mut chars) => match other {
+                CharacterSet::Include(other_chars) => {
+                    chars.extend(other_chars);
+                    chars.sort_unstable();
+                    chars.dedup();
+                    CharacterSet::Include(chars)
+                }
+                CharacterSet::Exclude(other_chars) => {
+                    let excluded_chars = other_chars
+                        .iter()
+                        .cloned()
+                        .filter(|c| !chars.contains(&c))
+                        .collect();
+                    CharacterSet::Exclude(excluded_chars)
+                }
+            },
+            CharacterSet::Exclude(mut chars) => match other {
+                CharacterSet::Include(other_chars) => {
+                    chars.retain(|c| !other_chars.contains(&c));
+                    CharacterSet::Exclude(chars)
+                }
+                CharacterSet::Exclude(other_chars) => {
+                    chars.retain(|c| other_chars.contains(&c));
+                    CharacterSet::Exclude(chars)
+                },
+            },
         }
-        panic!("Called add with a negated character set");
     }
 
     pub fn does_intersect(&self, other: &CharacterSet) -> bool {
@@ -458,6 +477,9 @@ mod tests {
                     (CharacterSet::empty().add_char('f'), 0, 4),
                 ],
                 vec![
+                    (CharacterSet::empty().add_char('d'), 0, vec![1, 2]),
+                    (CharacterSet::empty().add_char('f'), 0, vec![1, 4]),
+                    (CharacterSet::empty().add_char('i'), 0, vec![1, 3]),
                     (
                         CharacterSet::empty()
                             .add_range('a', 'c')
@@ -467,9 +489,6 @@ mod tests {
                         0,
                         vec![1],
                     ),
-                    (CharacterSet::empty().add_char('d'), 0, vec![1, 2]),
-                    (CharacterSet::empty().add_char('f'), 0, vec![1, 4]),
-                    (CharacterSet::empty().add_char('i'), 0, vec![1, 3]),
                 ],
             ),
         ];
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 2b7e7b4d..4ef17b27 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -164,12 +164,20 @@ impl NfaBuilder {
                     Err(Error::regex("Unicode character classes are not supported"))
                 }
                 Class::Perl(class) => {
-                    self.push_advance(self.expand_perl_character_class(&class.kind), next_state_id);
+                    let mut chars = self.expand_perl_character_class(&class.kind);
+                    if class.negated {
+                        chars = chars.negate();
+                    }
+                    self.push_advance(chars, next_state_id);
                     Ok(true)
                 }
                 Class::Bracketed(class) => match &class.kind {
                     ClassSet::Item(item) => {
-                        self.push_advance(self.expand_character_class(&item)?, next_state_id);
+                        let mut chars = self.expand_character_class(&item)?;
+                        if class.negated {
+                            chars = chars.negate();
+                        }
+                        self.push_advance(chars, next_state_id);
                         Ok(true)
                     }
                     ClassSet::BinaryOp(_) => Err(Error::regex(
diff --git a/src/tables.rs b/src/tables.rs
index 0815aac8..344c4816 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -37,6 +37,7 @@ pub(crate) struct ParseState {
     pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
     pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
     pub lex_state_id: usize,
+    pub unfinished_item_signature: u64,
 }
 
 #[derive(Debug, PartialEq, Eq)]

From 9824ebbbc31f7cda43f8a5aa5b3847462ab4c6aa Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 2 Jan 2019 12:34:40 -0800
Subject: [PATCH 081/208] Implement lex table construction

---
 src/build_tables/build_lex_table.rs    | 124 ++++++++++++++++
 src/build_tables/build_parse_table.rs  |  31 ++--
 src/build_tables/item_set_builder.rs   |  20 +--
 src/build_tables/lex_table_builder.rs  |  24 ---
 src/build_tables/mod.rs                | 131 ++++++++++++++++-
 src/build_tables/shrink_parse_table.rs |   2 -
 src/build_tables/token_conflicts.rs    |  80 +++++-----
 src/grammars.rs                        |  10 +-
 src/main.rs                            |   2 +-
 src/nfa.rs                             | 130 ++++++-----------
 src/prepare_grammar/expand_tokens.rs   |  24 ++-
 src/prepare_grammar/extract_tokens.rs  |  17 ++-
 src/render/mod.rs                      | 195 +++++++++++++++++++++++--
 src/rules.rs                           |   3 +
 src/tables.rs                          |  15 +-
 15 files changed, 581 insertions(+), 227 deletions(-)
 create mode 100644 src/build_tables/build_lex_table.rs
 delete mode 100644 src/build_tables/lex_table_builder.rs

diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
new file mode 100644
index 00000000..aa929d97
--- /dev/null
+++ b/src/build_tables/build_lex_table.rs
@@ -0,0 +1,124 @@
+use super::item::LookaheadSet;
+use super::token_conflicts::TokenConflictMap;
+use crate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::nfa::NfaCursor;
+use crate::rules::Symbol;
+use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, VecDeque};
+
+pub(crate) fn build_lex_table(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+    keywords: &LookaheadSet,
+) -> (LexTable, LexTable) {
+    let keyword_lex_table;
+    if syntax_grammar.word_token.is_some() {
+        let mut builder = LexTableBuilder::new(lexical_grammar);
+        builder.add_state_for_tokens(keywords.iter());
+        keyword_lex_table = builder.table;
+    } else {
+        keyword_lex_table = LexTable::default();
+    }
+
+    let mut builder = LexTableBuilder::new(lexical_grammar);
+    for state in parse_table.states.iter_mut() {
+        let tokens = state.terminal_entries.keys().filter_map(|token| {
+            if token.is_terminal() {
+                if keywords.contains(&token) {
+                    syntax_grammar.word_token
+                } else {
+                    Some(*token)
+                }
+            } else {
+                None
+            }
+        });
+        state.lex_state_id = builder.add_state_for_tokens(tokens);
+    }
+
+    (builder.table, keyword_lex_table)
+}
+
+struct LexTableBuilder<'a> {
+    lexical_grammar: &'a LexicalGrammar,
+    cursor: NfaCursor<'a>,
+    table: LexTable,
+    state_queue: VecDeque<(usize, Vec<u32>)>,
+    state_ids_by_nfa_state_set: HashMap<Vec<u32>, usize>,
+}
+
+impl<'a> LexTableBuilder<'a> {
+    fn new(lexical_grammar: &'a LexicalGrammar) -> Self {
+        Self {
+            lexical_grammar,
+            cursor: NfaCursor::new(&lexical_grammar.nfa, vec![]),
+            table: LexTable::default(),
+            state_queue: VecDeque::new(),
+            state_ids_by_nfa_state_set: HashMap::new(),
+        }
+    }
+
+    fn add_state_for_tokens(&mut self, tokens: impl Iterator<Item = Symbol>) -> usize {
+        let nfa_states = tokens
+            .map(|token| self.lexical_grammar.variables[token.index].start_state)
+            .collect();
+        let result = self.add_state(nfa_states);
+        while let Some((state_id, nfa_states)) = self.state_queue.pop_front() {
+            self.populate_state(state_id, nfa_states);
+        }
+        result
+    }
+
+    fn add_state(&mut self, nfa_states: Vec<u32>) -> usize {
+        match self.state_ids_by_nfa_state_set.entry(nfa_states) {
+            Entry::Occupied(o) => *o.get(),
+            Entry::Vacant(v) => {
+                let state_id = self.table.states.len();
+                self.table.states.push(LexState::default());
+                self.state_queue.push_back((state_id, v.key().clone()));
+                v.insert(state_id);
+                state_id
+            }
+        }
+    }
+
+    fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>) {
+        self.cursor.reset(nfa_states);
+
+        let mut completion = None;
+        for (id, prec) in self.cursor.completions() {
+            if let Some((prev_id, prev_precedence)) = completion {
+                if TokenConflictMap::prefer_token(
+                    self.lexical_grammar,
+                    (prev_precedence, prev_id),
+                    (prec, id),
+                ) {
+                    continue;
+                }
+            }
+            completion = Some((id, prec));
+        }
+
+        for (chars, advance_precedence, next_states, is_sep) in self.cursor.grouped_successors() {
+            if let Some((_, completed_precedence)) = completion {
+                if advance_precedence < completed_precedence {
+                    continue;
+                }
+            }
+            let next_state_id = self.add_state(next_states);
+            self.table.states[state_id].advance_actions.push((
+                chars,
+                AdvanceAction {
+                    state: next_state_id,
+                    in_main_token: !is_sep,
+                },
+            ));
+        }
+
+        if let Some((completion_index, _)) = completion {
+            self.table.states[state_id].accept_action = Some(completion_index);
+        }
+    }
+}
diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index 2fe6fd8d..c17261dc 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -7,10 +7,10 @@ use crate::tables::{
     AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
 };
 use core::ops::Range;
-use std::hash::Hasher;
-use std::collections::hash_map::{Entry, DefaultHasher};
+use std::collections::hash_map::{DefaultHasher, Entry};
 use std::collections::{HashMap, HashSet, VecDeque};
 use std::fmt::Write;
+use std::hash::Hasher;
 
 #[derive(Clone)]
 struct AuxiliarySymbolInfo {
@@ -31,7 +31,6 @@ struct ParseTableBuilder<'a> {
     item_set_builder: ParseItemSetBuilder<'a>,
     syntax_grammar: &'a SyntaxGrammar,
     lexical_grammar: &'a LexicalGrammar,
-    inlines: &'a InlinedProductionMap,
     state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
     item_sets_by_state_id: Vec<ParseItemSet<'a>>,
     parse_state_queue: VecDeque<ParseStateQueueEntry>,
@@ -51,9 +50,12 @@ impl<'a> ParseTableBuilder<'a> {
             &Vec::new(),
             &Vec::new(),
             ParseItemSet::with(
-                [(ParseItem::start(), LookaheadSet::with([Symbol::end()].iter().cloned()))]
-                    .iter()
-                    .cloned(),
+                [(
+                    ParseItem::start(),
+                    LookaheadSet::with([Symbol::end()].iter().cloned()),
+                )]
+                .iter()
+                .cloned(),
             ),
         );
 
@@ -69,8 +71,12 @@ impl<'a> ParseTableBuilder<'a> {
         item_set: ParseItemSet<'a>,
     ) -> ParseStateId {
         if preceding_symbols.len() > 1 {
-            let left_tokens = self.item_set_builder.last_set(&preceding_symbols[preceding_symbols.len() - 2]);
-            let right_tokens = self.item_set_builder.first_set(&preceding_symbols[preceding_symbols.len() - 1]);
+            let left_tokens = self
+                .item_set_builder
+                .last_set(&preceding_symbols[preceding_symbols.len() - 2]);
+            let right_tokens = self
+                .item_set_builder
+                .first_set(&preceding_symbols[preceding_symbols.len() - 1]);
             for left_token in left_tokens.iter() {
                 if left_token.is_terminal() {
                     self.following_tokens[left_token.index].insert_all(right_tokens);
@@ -117,11 +123,9 @@ impl<'a> ParseTableBuilder<'a> {
                 );
             }
 
-            let item_set = self.item_set_builder.transitive_closure(
-                &self.item_sets_by_state_id[entry.state_id],
-                self.syntax_grammar,
-                self.inlines,
-            );
+            let item_set = self
+                .item_set_builder
+                .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
 
             if debug {
                 println!(
@@ -606,7 +610,6 @@ pub(crate) fn build_parse_table(
     ParseTableBuilder {
         syntax_grammar,
         lexical_grammar,
-        inlines,
         item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
         state_ids_by_item_set: HashMap::new(),
         item_sets_by_state_id: Vec::new(),
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
index 8649cb52..5e61bfcc 100644
--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@@ -18,6 +18,7 @@ struct FollowSetInfo {
 pub(crate) struct ParseItemSetBuilder<'a> {
     first_sets: HashMap<Symbol, LookaheadSet>,
     last_sets: HashMap<Symbol, LookaheadSet>,
+    inlines: &'a InlinedProductionMap,
     transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
 }
 
@@ -36,6 +37,7 @@ impl<'a> ParseItemSetBuilder<'a> {
         let mut result = Self {
             first_sets: HashMap::new(),
             last_sets: HashMap::new(),
+            inlines,
             transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
         };
 
@@ -237,15 +239,12 @@ impl<'a> ParseItemSetBuilder<'a> {
         result
     }
 
-    pub(crate) fn transitive_closure(
-        &mut self,
-        item_set: &ParseItemSet<'a>,
-        grammar: &'a SyntaxGrammar,
-        inlines: &'a InlinedProductionMap,
-    ) -> ParseItemSet<'a> {
+    pub(crate) fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
         let mut result = ParseItemSet::default();
         for (item, lookaheads) in &item_set.entries {
-            if let Some(productions) = inlines.inlined_productions(item.production, item.step_index)
+            if let Some(productions) = self
+                .inlines
+                .inlined_productions(item.production, item.step_index)
             {
                 for production in productions {
                     self.add_item(
@@ -273,12 +272,7 @@ impl<'a> ParseItemSetBuilder<'a> {
         &self.first_sets[symbol]
     }
 
-    fn add_item(
-        &self,
-        set: &mut ParseItemSet<'a>,
-        item: ParseItem<'a>,
-        lookaheads: &LookaheadSet,
-    ) {
+    fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &LookaheadSet) {
         if let Some(step) = item.step() {
             if step.symbol.is_non_terminal() {
                 let next_step = item.successor().step();
diff --git a/src/build_tables/lex_table_builder.rs b/src/build_tables/lex_table_builder.rs
deleted file mode 100644
index 86d1578b..00000000
--- a/src/build_tables/lex_table_builder.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-use crate::rules::Symbol;
-use crate::tables::LexTable;
-use crate::grammars::{SyntaxGrammar, LexicalGrammar};
-
-pub(crate) struct LexTableBuilder<'a> {
-    syntax_grammar: &'a SyntaxGrammar,
-    lexical_grammar: &'a LexicalGrammar,
-    table: LexTable,
-}
-
-impl<'a> LexTableBuilder<'a> {
-    pub fn new(
-        syntax_grammar: &'a SyntaxGrammar,
-        lexical_grammar: &'a LexicalGrammar,
-    ) -> Self {
-        Self {
-            syntax_grammar, lexical_grammar, table: LexTable::default()
-        }
-    }
-
-    pub fn build(self) -> (LexTable, LexTable, Option<Symbol>) {
-        (LexTable::default(), LexTable::default(), None)
-    }
-}
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 665c56a0..8b3a2db4 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -1,11 +1,12 @@
+mod build_lex_table;
 mod build_parse_table;
 mod coincident_tokens;
 mod item;
 mod item_set_builder;
-mod lex_table_builder;
 mod shrink_parse_table;
 mod token_conflicts;
 
+use self::build_lex_table::build_lex_table;
 use self::build_parse_table::build_parse_table;
 use self::coincident_tokens::CoincidentTokenIndex;
 use self::item::LookaheadSet;
@@ -13,6 +14,7 @@ use self::shrink_parse_table::shrink_parse_table;
 use self::token_conflicts::TokenConflictMap;
 use crate::error::Result;
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::nfa::{CharacterSet, NfaCursor};
 use crate::rules::{AliasMap, Symbol};
 use crate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
 
@@ -25,7 +27,22 @@ pub(crate) fn build_tables(
     let (mut parse_table, following_tokens) =
         build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
     let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
+
+    eprintln!("{:?}", token_conflict_map);
+
     let coincident_token_index = CoincidentTokenIndex::new(&parse_table);
+    let keywords = if let Some(word_token) = syntax_grammar.word_token {
+        identify_keywords(
+            lexical_grammar,
+            &parse_table,
+            word_token,
+            &token_conflict_map,
+            &coincident_token_index,
+        )
+    } else {
+        LookaheadSet::new()
+    };
+
     populate_error_state(
         &mut parse_table,
         syntax_grammar,
@@ -39,7 +56,14 @@ pub(crate) fn build_tables(
         simple_aliases,
         &token_conflict_map,
     );
-    Ok((parse_table, LexTable::default(), LexTable::default(), None))
+    let (main_lex_table, keyword_lex_table) =
+        build_lex_table(&mut parse_table, syntax_grammar, lexical_grammar, &keywords);
+    Ok((
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        syntax_grammar.word_token,
+    ))
 }
 
 fn populate_error_state(
@@ -77,13 +101,10 @@ fn populate_error_state(
                     || !token_conflict_map.does_conflict(i, t.index)
             });
         if can_be_used_for_recovery {
-            eprintln!("include {}", &lexical_grammar.variables[symbol.index].name);
             state
                 .terminal_entries
                 .entry(symbol)
                 .or_insert_with(|| recover_entry.clone());
-        } else {
-            eprintln!("exclude {}", &lexical_grammar.variables[symbol.index].name);
         }
     }
 
@@ -98,3 +119,103 @@ fn populate_error_state(
 
     state.terminal_entries.insert(Symbol::end(), recover_entry);
 }
+
+fn identify_keywords(
+    lexical_grammar: &LexicalGrammar,
+    parse_table: &ParseTable,
+    word_token: Symbol,
+    token_conflict_map: &TokenConflictMap,
+    coincident_token_index: &CoincidentTokenIndex,
+) -> LookaheadSet {
+    let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
+
+    // First find all of the candidate keyword tokens: tokens that start with
+    // letters or underscore and can match the same string as a word token.
+    let keywords = LookaheadSet::with(lexical_grammar.variables.iter().enumerate().filter_map(
+        |(i, variable)| {
+            cursor.reset(vec![variable.start_state]);
+            if all_chars_are_alphabetical(&cursor)
+                && token_conflict_map.does_match_same_string(i, word_token.index)
+            {
+                Some(Symbol::terminal(i))
+            } else {
+                None
+            }
+        },
+    ));
+
+    // Exclude keyword candidates that shadow another keyword candidate.
+    let keywords = LookaheadSet::with(keywords.iter().filter(|token| {
+        for other_token in keywords.iter() {
+            if other_token != *token
+                && token_conflict_map.does_match_same_string(token.index, other_token.index)
+            {
+                eprintln!(
+                    "Exclude {} from keywords because it matches the same string as {}",
+                    lexical_grammar.variables[token.index].name,
+                    lexical_grammar.variables[other_token.index].name
+                );
+                return false;
+            }
+        }
+        true
+    }));
+
+    // Exclude keyword candidates for which substituting the keyword capture
+    // token would introduce new lexical conflicts with other tokens.
+    let keywords = LookaheadSet::with(keywords.iter().filter(|token| {
+        for other_index in 0..lexical_grammar.variables.len() {
+            if keywords.contains(&Symbol::terminal(other_index)) {
+                continue;
+            }
+
+            // If the word token was already valid in every state containing
+            // this keyword candidate, then substituting the word token won't
+            // introduce any new lexical conflicts.
+            if coincident_token_index
+                .states_with(*token, Symbol::terminal(other_index))
+                .iter()
+                .all(|state_id| {
+                    parse_table.states[*state_id]
+                        .terminal_entries
+                        .contains_key(&word_token)
+                })
+            {
+                continue;
+            }
+
+            if !token_conflict_map.has_same_conflict_status(
+                token.index,
+                word_token.index,
+                other_index,
+            ) {
+                eprintln!(
+                    "Exclude {} from keywords because of conflict with {}",
+                    lexical_grammar.variables[token.index].name,
+                    lexical_grammar.variables[other_index].name
+                );
+                return false;
+            }
+        }
+
+        eprintln!(
+            "Include {} in keywords",
+            lexical_grammar.variables[token.index].name,
+        );
+        true
+    }));
+
+    keywords
+}
+
+fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
+    cursor.successors().all(|(chars, _, _, is_sep)| {
+        if is_sep {
+            true
+        } else if let CharacterSet::Include(chars) = chars {
+            chars.iter().all(|c| c.is_alphabetic() || *c == '_')
+        } else {
+            false
+        }
+    })
+}
diff --git a/src/build_tables/shrink_parse_table.rs b/src/build_tables/shrink_parse_table.rs
index 026c3058..b943158f 100644
--- a/src/build_tables/shrink_parse_table.rs
+++ b/src/build_tables/shrink_parse_table.rs
@@ -166,8 +166,6 @@ fn merge_parse_state(
         }
     }
 
-    eprintln!("maybe merge {} {}", left, right);
-
     let mut symbols_to_add = Vec::new();
     for (symbol, right_entry) in &right_state.terminal_entries {
         if !left_state.terminal_entries.contains_key(&symbol) {
diff --git a/src/build_tables/token_conflicts.rs b/src/build_tables/token_conflicts.rs
index 09d5e97c..9f1c4426 100644
--- a/src/build_tables/token_conflicts.rs
+++ b/src/build_tables/token_conflicts.rs
@@ -4,7 +4,7 @@ use crate::nfa::{CharacterSet, NfaCursor};
 use std::collections::HashSet;
 use std::fmt;
 
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 struct TokenConflictStatus {
     does_overlap: bool,
     does_match_valid_continuation: bool,
@@ -12,15 +12,16 @@ struct TokenConflictStatus {
     matches_same_string: bool,
 }
 
-pub(crate) struct TokenConflictMap {
+pub(crate) struct TokenConflictMap<'a> {
     n: usize,
     status_matrix: Vec<TokenConflictStatus>,
     starting_chars_by_index: Vec<CharacterSet>,
     following_chars_by_index: Vec<CharacterSet>,
+    grammar: &'a LexicalGrammar,
 }
 
-impl TokenConflictMap {
-    pub fn new(grammar: &LexicalGrammar, following_tokens: Vec<LookaheadSet>) -> Self {
+impl<'a> TokenConflictMap<'a> {
+    pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<LookaheadSet>) -> Self {
         let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
         let starting_chars = get_starting_chars(&mut cursor, grammar);
         let following_chars = get_following_chars(&starting_chars, following_tokens);
@@ -40,9 +41,16 @@ impl TokenConflictMap {
             status_matrix,
             starting_chars_by_index: starting_chars,
             following_chars_by_index: following_chars,
+            grammar,
         }
     }
 
+    pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
+        let left = &self.status_matrix[matrix_index(self.n, a, other)];
+        let right = &self.status_matrix[matrix_index(self.n, b, other)];
+        left == right
+    }
+
     pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
         self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
     }
@@ -55,9 +63,28 @@ impl TokenConflictMap {
     pub fn does_overlap(&self, i: usize, j: usize) -> bool {
         self.status_matrix[matrix_index(self.n, i, j)].does_overlap
     }
+
+    pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
+        if left.0 > right.0 {
+            return true;
+        } else if left.0 < right.0 {
+            return false;
+        }
+
+        match (
+            grammar.variables[left.1].is_string,
+            grammar.variables[right.1].is_string,
+        ) {
+            (true, false) => return true,
+            (false, true) => return false,
+            _ => {}
+        }
+
+        left.0 < right.0
+    }
 }
 
-impl fmt::Debug for TokenConflictMap {
+impl<'a> fmt::Debug for TokenConflictMap<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "TokenConflictMap {{\n")?;
 
@@ -69,18 +96,22 @@ impl fmt::Debug for TokenConflictMap {
 
         write!(f, "  following_characters: {{\n")?;
         for i in 0..self.n {
-            write!(f, "    {}: {:?},\n", i, self.following_chars_by_index[i])?;
+            write!(
+                f,
+                "    {}: {:?},\n",
+                self.grammar.variables[i].name, self.following_chars_by_index[i]
+            )?;
         }
         write!(f, "  }},\n")?;
 
         write!(f, "  status_matrix: {{\n")?;
         for i in 0..self.n {
-            write!(f, "    {}: {{\n", i)?;
+            write!(f, "    {}: {{\n", self.grammar.variables[i].name)?;
             for j in 0..self.n {
                 write!(
                     f,
                     "      {}: {:?},\n",
-                    j,
+                    self.grammar.variables[j].name,
                     self.status_matrix[matrix_index(self.n, i, j)]
                 )?;
             }
@@ -101,7 +132,7 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
     for variable in &grammar.variables {
         cursor.reset(vec![variable.start_state]);
         let mut all_chars = CharacterSet::empty();
-        for (chars, _, _) in cursor.successors() {
+        for (chars, _, _, _) in cursor.successors() {
             all_chars = all_chars.add(chars);
         }
         result.push(all_chars);
@@ -162,7 +193,11 @@ fn compute_conflict_status(
                 // Prefer tokens with higher precedence. For tokens with equal precedence,
                 // prefer those listed earlier in the grammar.
                 let winning_id;
-                if prefer_token(grammar, (prev_precedence, prev_id), (precedence, id)) {
+                if TokenConflictMap::prefer_token(
+                    grammar,
+                    (prev_precedence, prev_id),
+                    (precedence, id),
+                ) {
                     winning_id = prev_id;
                 } else {
                     winning_id = id;
@@ -181,7 +216,7 @@ fn compute_conflict_status(
             }
         }
 
-        for (chars, advance_precedence, next_states) in cursor.grouped_successors() {
+        for (chars, advance_precedence, next_states, in_sep) in cursor.grouped_successors() {
             let mut can_advance = true;
             if let Some((completed_id, completed_precedence)) = completion {
                 let mut other_id = None;
@@ -209,15 +244,13 @@ fn compute_conflict_status(
                         if chars.does_intersect(&following_chars[j]) {
                             result.0.does_match_valid_continuation = true;
                         }
-                        if cursor.in_separator() {
+                        if in_sep {
                             result.0.does_match_separators = true;
                         }
                     } else {
                         result.1.does_overlap = true;
                         if chars.does_intersect(&following_chars[i]) {
                             result.1.does_match_valid_continuation = true;
-                        } else {
-                            result.1.does_match_separators = true;
                         }
                     }
                 }
@@ -231,25 +264,6 @@ fn compute_conflict_status(
     result
 }
 
-fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
-    if left.0 > right.0 {
-        return true;
-    } else if left.0 < right.0 {
-        return false;
-    }
-
-    match (
-        grammar.variables[left.1].is_string,
-        grammar.variables[right.1].is_string,
-    ) {
-        (true, false) => return true,
-        (false, true) => return false,
-        _ => {}
-    }
-
-    left.0 < right.0
-}
-
 fn variable_ids_for_states<'a>(
     state_ids: &'a Vec<u32>,
     grammar: &'a LexicalGrammar,
diff --git a/src/grammars.rs b/src/grammars.rs
index 18da86d8..d23e8ca6 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -91,6 +91,7 @@ pub(crate) struct SyntaxGrammar {
     pub word_token: Option<Symbol>,
 }
 
+#[cfg(test)]
 impl ProductionStep {
     pub(crate) fn new(symbol: Symbol) -> Self {
         Self {
@@ -127,14 +128,6 @@ impl Production {
     pub fn first_symbol(&self) -> Option<Symbol> {
         self.steps.first().map(|s| s.symbol.clone())
     }
-
-    pub fn last_precedence(&self) -> i32 {
-        self.steps.last().map(|s| s.precedence).unwrap_or(0)
-    }
-
-    pub fn last_associativity(&self) -> Option<Associativity> {
-        self.steps.last().map(|s| s.associativity).unwrap_or(None)
-    }
 }
 
 impl Default for Production {
@@ -146,6 +139,7 @@ impl Default for Production {
     }
 }
 
+#[cfg(test)]
 impl Variable {
     pub fn named(name: &str, rule: Rule) -> Self {
         Self {
diff --git a/src/main.rs b/src/main.rs
index c7ca2ca5..cd672186 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -42,7 +42,7 @@ fn main() -> error::Result<()> {
         )
         .get_matches();
 
-    if let Some(matches) = matches.subcommand_matches("generate") {
+    if let Some(_) = matches.subcommand_matches("generate") {
         let mut grammar_path = env::current_dir().expect("Failed to read CWD");
         grammar_path.push("grammar.js");
         let grammar_json = load_js_grammar_file(grammar_path);
diff --git a/src/nfa.rs b/src/nfa.rs
index ee39d178..e14dac44 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -40,7 +40,6 @@ impl Default for Nfa {
 pub struct NfaCursor<'a> {
     pub(crate) state_ids: Vec<u32>,
     nfa: &'a Nfa,
-    in_sep: bool,
 }
 
 impl CharacterSet {
@@ -111,7 +110,7 @@ impl CharacterSet {
                 CharacterSet::Exclude(other_chars) => {
                     chars.retain(|c| other_chars.contains(&c));
                     CharacterSet::Exclude(chars)
-                },
+                }
             },
         }
     }
@@ -311,7 +310,6 @@ impl<'a> NfaCursor<'a> {
         let mut result = Self {
             nfa,
             state_ids: Vec::new(),
-            in_sep: true,
         };
         result.add_states(&mut states);
         result
@@ -322,81 +320,59 @@ impl<'a> NfaCursor<'a> {
         self.add_states(&mut states);
     }
 
-    pub fn advance(&mut self, c: char) -> bool {
-        let mut result = false;
-        let mut new_state_ids = Vec::new();
-        let mut any_sep_transitions = false;
-        for current_state_id in &self.state_ids {
-            if let NfaState::Advance {
-                chars,
-                state_id,
-                is_sep,
-                ..
-            } = &self.nfa.states[*current_state_id as usize]
-            {
-                if chars.contains(c) {
-                    if *is_sep {
-                        any_sep_transitions = true;
-                    }
-                    new_state_ids.push(*state_id);
-                    result = true;
-                }
-            }
-        }
-        if !any_sep_transitions {
-            self.in_sep = false;
-        }
-        self.state_ids.clear();
-        self.add_states(&mut new_state_ids);
-        result
-    }
-
-    pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32)> {
+    pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32, bool)> {
         self.state_ids.iter().filter_map(move |id| {
             if let NfaState::Advance {
                 chars,
                 state_id,
                 precedence,
-                ..
+                is_sep,
             } = &self.nfa.states[*id as usize]
             {
-                Some((chars, *precedence, *state_id))
+                Some((chars, *precedence, *state_id, *is_sep))
             } else {
                 None
             }
         })
     }
 
-    pub fn grouped_successors(&self) -> Vec<(CharacterSet, i32, Vec<u32>)> {
+    pub fn grouped_successors(&self) -> Vec<(CharacterSet, i32, Vec<u32>, bool)> {
         Self::group_successors(self.successors())
     }
 
     fn group_successors<'b>(
-        iter: impl Iterator<Item = (&'b CharacterSet, i32, u32)>,
-    ) -> Vec<(CharacterSet, i32, Vec<u32>)> {
-        let mut result: Vec<(CharacterSet, i32, Vec<u32>)> = Vec::new();
-        for (chars, prec, state) in iter {
+        iter: impl Iterator<Item = (&'b CharacterSet, i32, u32, bool)>,
+    ) -> Vec<(CharacterSet, i32, Vec<u32>, bool)> {
+        let mut result: Vec<(CharacterSet, i32, Vec<u32>, bool)> = Vec::new();
+        for (chars, prec, state, is_sep) in iter {
             let mut chars = chars.clone();
             let mut i = 0;
             while i < result.len() {
-                let intersection = result[i].0.remove_intersection(&mut chars);
-                if !intersection.is_empty() {
-                    if result[i].0.is_empty() {
-                        result[i].0 = intersection;
-                        result[i].1 = max(result[i].1, prec);
-                        result[i].2.push(state);
-                    } else {
+                if result[i].0 == chars {
+                    result[i].1 = max(result[i].1, prec);
+                    result[i].2.push(state);
+                    result[i].3 |= is_sep;
+                } else {
+                    let intersection = result[i].0.remove_intersection(&mut chars);
+                    if !intersection.is_empty() {
                         let mut states = result[i].2.clone();
-                        let mut precedence = result[i].1;
                         states.push(state);
-                        result.insert(i, (intersection, max(precedence, prec), states));
+                        result.insert(
+                            i,
+                            (
+                                intersection,
+                                max(result[i].1, prec),
+                                states,
+                                result[i].3 || is_sep,
+                            ),
+                        );
                         i += 1;
                     }
                 }
                 i += 1;
             }
             if !chars.is_empty() {
-                result.push((chars, prec, vec![state]));
+                result.push((chars, prec, vec![state], is_sep));
             }
         }
         result.sort_unstable_by(|a, b| a.0.cmp(&b.0));
@@ -417,10 +393,6 @@ impl<'a> NfaCursor<'a> {
         })
     }
 
-    pub fn in_separator(&self) -> bool {
-        self.in_sep
-    }
-
     pub fn add_states(&mut self, new_state_ids: &mut Vec<u32>) {
         let mut i = 0;
         while i < new_state_ids.len() {
@@ -460,26 +432,31 @@ mod tests {
         let table = [
             (
                 vec![
-                    (CharacterSet::empty().add_range('a', 'f'), 0, 1),
-                    (CharacterSet::empty().add_range('d', 'i'), 1, 2),
+                    (CharacterSet::empty().add_range('a', 'f'), 0, 1, false),
+                    (CharacterSet::empty().add_range('d', 'i'), 1, 2, false),
                 ],
                 vec![
-                    (CharacterSet::empty().add_range('a', 'c'), 0, vec![1]),
-                    (CharacterSet::empty().add_range('d', 'f'), 1, vec![1, 2]),
-                    (CharacterSet::empty().add_range('g', 'i'), 1, vec![2]),
+                    (CharacterSet::empty().add_range('a', 'c'), 0, vec![1], false),
+                    (
+                        CharacterSet::empty().add_range('d', 'f'),
+                        1,
+                        vec![1, 2],
+                        false,
+                    ),
+                    (CharacterSet::empty().add_range('g', 'i'), 1, vec![2], false),
                 ],
             ),
             (
                 vec![
-                    (CharacterSet::empty().add_range('a', 'z'), 0, 1),
-                    (CharacterSet::empty().add_char('d'), 0, 2),
-                    (CharacterSet::empty().add_char('i'), 0, 3),
-                    (CharacterSet::empty().add_char('f'), 0, 4),
+                    (CharacterSet::empty().add_range('a', 'z'), 0, 1, false),
+                    (CharacterSet::empty().add_char('d'), 0, 2, false),
+                    (CharacterSet::empty().add_char('i'), 0, 3, false),
+                    (CharacterSet::empty().add_char('f'), 0, 4, false),
                 ],
                 vec![
-                    (CharacterSet::empty().add_char('d'), 0, vec![1, 2]),
-                    (CharacterSet::empty().add_char('f'), 0, vec![1, 4]),
-                    (CharacterSet::empty().add_char('i'), 0, vec![1, 3]),
+                    (CharacterSet::empty().add_char('d'), 0, vec![1, 2], false),
+                    (CharacterSet::empty().add_char('f'), 0, vec![1, 4], false),
+                    (CharacterSet::empty().add_char('i'), 0, vec![1, 3], false),
                     (
                         CharacterSet::empty()
                             .add_range('a', 'c')
@@ -488,6 +465,7 @@ mod tests {
                             .add_range('j', 'z'),
                         0,
                         vec![1],
+                        false,
                     ),
                 ],
             ),
@@ -495,28 +473,10 @@ mod tests {
 
         for row in table.iter() {
             assert_eq!(
-                NfaCursor::group_successors(row.0.iter().map(|(c, p, s)| (c, *p, *s))),
+                NfaCursor::group_successors(row.0.iter().map(|(c, p, s, sep)| (c, *p, *s, *sep))),
                 row.1
             );
         }
-
-        // let successors = NfaCursor::group_successors(
-        //     [
-        //         (&CharacterSet::empty().add_range('a', 'f'), 1),
-        //         (&CharacterSet::empty().add_range('d', 'i'), 2),
-        //     ]
-        //     .iter()
-        //     .cloned(),
-        // );
-        //
-        // assert_eq!(
-        //     successors,
-        //     vec![
-        //         (CharacterSet::empty().add_range('a', 'c'), vec![1],),
-        //         (CharacterSet::empty().add_range('d', 'f'), vec![1, 2],),
-        //         (CharacterSet::empty().add_range('g', 'i'), vec![2],),
-        //     ]
-        // );
     }
 
     #[test]
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 4ef17b27..fdf085f6 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -6,6 +6,7 @@ use crate::rules::Rule;
 use regex_syntax::ast::{
     parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
 };
+use std::i32;
 
 struct NfaBuilder {
     nfa: Nfa,
@@ -17,7 +18,7 @@ fn is_string(rule: &Rule) -> bool {
     match rule {
         Rule::String(_) => true,
         Rule::Metadata { rule, .. } => is_string(rule),
-        _ => false
+        _ => false,
     }
 }
 
@@ -346,7 +347,9 @@ impl NfaBuilder {
 
     fn push_split(&mut self, state_id: u32) {
         let last_state_id = self.nfa.last_state_id();
-        self.nfa.states.push(NfaState::Split(state_id, last_state_id));
+        self.nfa
+            .states
+            .push(NfaState::Split(state_id, last_state_id));
     }
 
     fn add_precedence(&mut self, prec: i32, mut state_ids: Vec<u32>) {
@@ -354,12 +357,12 @@ impl NfaBuilder {
         while i < state_ids.len() {
             let state_id = state_ids[i];
             let (left, right) = match &mut self.nfa.states[state_id as usize] {
-                NfaState::Accept {precedence, ..} => {
+                NfaState::Accept { precedence, .. } => {
                     *precedence = prec;
                     return;
-                },
+                }
                 NfaState::Split(left, right) => (*left, *right),
-                _ => return
+                _ => return,
             };
             if !state_ids.contains(&left) {
                 state_ids.push(left);
@@ -383,7 +386,7 @@ mod tests {
         let mut cursor = NfaCursor::new(&grammar.nfa, start_states);
 
         let mut result = None;
-        let mut result_precedence = 0;
+        let mut result_precedence = i32::MIN;
         let mut start_char = 0;
         let mut end_char = 0;
         for c in s.chars() {
@@ -393,9 +396,14 @@ mod tests {
                     result_precedence = precedence;
                 }
             }
-            if cursor.advance(c) {
+            if let Some((_, _, next_states, in_sep)) = cursor
+                .grouped_successors()
+                .into_iter()
+                .find(|(chars, prec, _, _)| chars.contains(c) && *prec >= result_precedence)
+            {
+                cursor.reset(next_states);
                 end_char += 1;
-                if cursor.in_separator() {
+                if in_sep {
                     start_char = end_char;
                 }
             } else {
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index eaeede90..5f3f6e16 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -1,6 +1,6 @@
 use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
 use crate::error::{Error, Result};
-use crate::grammars::{ExternalToken, Variable};
+use crate::grammars::{ExternalToken, Variable, VariableType};
 use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
 use std::collections::HashMap;
 use std::mem;
@@ -240,16 +240,21 @@ impl TokenExtractor {
 
         let index = self.extracted_variables.len();
         let variable = if let Some(string_value) = string_value {
-            Variable::anonymous(string_value, rule.clone())
+            Variable {
+                name: string_value.clone(),
+                kind: VariableType::Anonymous,
+                rule: rule.clone()
+            }
         } else {
             self.current_variable_token_count += 1;
-            Variable::auxiliary(
-                &format!(
+            Variable {
+                name: format!(
                     "{}_token{}",
                     &self.current_variable_name, self.current_variable_token_count
                 ),
-                rule.clone(),
-            )
+                kind: VariableType::Auxiliary,
+                rule: rule.clone(),
+            }
         };
 
         self.extracted_variables.push(variable);
diff --git a/src/render/mod.rs b/src/render/mod.rs
index fc4cdafb..cbb8ba0d 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -2,6 +2,7 @@ use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType
 use crate::nfa::CharacterSet;
 use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
 use crate::tables::{LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
+use core::ops::Range;
 use std::collections::{HashMap, HashSet};
 use std::fmt::Write;
 use std::mem::swap;
@@ -12,11 +13,17 @@ macro_rules! add {
     }}
 }
 
-macro_rules! add_line {
-    ($this: tt, $($arg: tt)*) => {
+macro_rules! add_whitespace {
+    ($this: tt) => {{
         for _ in 0..$this.indent_level {
             write!(&mut $this.buffer, "  ").unwrap();
         }
+    }};
+}
+
+macro_rules! add_line {
+    ($this: tt, $($arg: tt)*) => {
+        add_whitespace!($this);
         $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
         $this.buffer += "\n";
     }
@@ -162,7 +169,7 @@ impl Generator {
             }
         }
 
-        add_line!(self, "#define LANGUAGE_VERSION {}", 6);
+        add_line!(self, "#define LANGUAGE_VERSION {}", 9);
         add_line!(
             self,
             "#define STATE_COUNT {}",
@@ -352,7 +359,7 @@ impl Generator {
             add_line!(
                 self,
                 "ACCEPT_TOKEN({})",
-                self.symbol_ids[&accept_action.symbol]
+                self.symbol_ids[&Symbol::terminal(accept_action)]
             );
         }
 
@@ -360,9 +367,10 @@ impl Generator {
         for (characters, action) in state.advance_actions {
             let previous_length = self.buffer.len();
 
+            add_whitespace!(self);
             add!(self, "if (");
             if self.add_character_set_condition(&characters, &ruled_out_characters) {
-                add!(self, ")");
+                add!(self, ")\n");
                 indent!(self);
                 if action.in_main_token {
                     add_line!(self, "ADVANCE({});", action.state);
@@ -370,7 +378,7 @@ impl Generator {
                     add_line!(self, "SKIP({});", action.state);
                 }
                 if let CharacterSet::Include(chars) = characters {
-                    ruled_out_characters.extend(chars.iter());
+                    ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
                 }
                 dedent!(self);
             } else {
@@ -384,9 +392,106 @@ impl Generator {
     fn add_character_set_condition(
         &mut self,
         characters: &CharacterSet,
-        ruled_out_characters: &HashSet<char>,
+        ruled_out_characters: &HashSet<u32>,
     ) -> bool {
-        true
+        match characters {
+            CharacterSet::Include(chars) => {
+                let ranges = Self::get_ranges(chars, ruled_out_characters);
+                self.add_character_range_conditions(ranges, false)
+            }
+            CharacterSet::Exclude(chars) => {
+                let ranges = Self::get_ranges(chars, ruled_out_characters);
+                self.add_character_range_conditions(ranges, true)
+            }
+        }
+    }
+
+    fn add_character_range_conditions(
+        &mut self,
+        ranges: impl Iterator<Item = Range<char>>,
+        is_negated: bool,
+    ) -> bool {
+        let line_break = "\n          ";
+        let mut did_add = false;
+        for range in ranges {
+            if is_negated {
+                if did_add {
+                    add!(self, " &&{}", line_break);
+                }
+                if range.end == range.start {
+                    add!(self, "lookahead != ");
+                    self.add_character(range.start);
+                } else if range.end as u32 == range.start as u32 + 1 {
+                    add!(self, "lookahead != ");
+                    self.add_character(range.start);
+                    add!(self, " &&{}lookahead != ", line_break);
+                    self.add_character(range.end);
+                } else {
+                    add!(self, "(lookahead < ");
+                    self.add_character(range.start);
+                    add!(self, " || ");
+                    self.add_character(range.end);
+                    add!(self, " < lookahead)");
+                }
+            } else {
+                if did_add {
+                    add!(self, " ||{}", line_break);
+                }
+                if range.end == range.start {
+                    add!(self, "lookahead == ");
+                    self.add_character(range.start);
+                } else if range.end as u32 == range.start as u32 + 1 {
+                    add!(self, "lookahead == ");
+                    self.add_character(range.start);
+                    add!(self, " ||{}lookahead == ", line_break);
+                    self.add_character(range.end);
+                } else {
+                    add!(self, "(");
+                    self.add_character(range.start);
+                    add!(self, " <= lookahead && lookahead <= ");
+                    self.add_character(range.end);
+                    add!(self, ")");
+                }
+            }
+            did_add = true;
+        }
+        did_add
+    }
+
+    fn get_ranges<'a>(
+        chars: &'a Vec<char>,
+        ruled_out_characters: &'a HashSet<u32>,
+    ) -> impl Iterator<Item = Range<char>> + 'a {
+        let mut prev_range: Option<Range<char>> = None;
+        chars
+            .iter()
+            .cloned()
+            .chain(Some('\0'))
+            .filter_map(move |c| {
+                if ruled_out_characters.contains(&(c as u32)) {
+                    return None;
+                }
+                if let Some(range) = prev_range.clone() {
+                    if c == '\0' {
+                        prev_range = Some(c..c);
+                        return Some(range);
+                    }
+
+                    let mut prev_range_successor = range.end as u32 + 1;
+                    while prev_range_successor < c as u32 {
+                        if !ruled_out_characters.contains(&prev_range_successor) {
+                            prev_range = Some(c..c);
+                            return Some(range);
+                        }
+                        prev_range_successor += 1;
+                    }
+                    prev_range = Some(range.start..c);
+                    None
+                } else {
+                    prev_range = Some(c..c);
+                    None
+                }
+            })
     }
 
     fn add_lex_modes_list(&mut self) {
@@ -577,13 +682,6 @@ impl Generator {
                         alias_sequence_id,
                         ..
                     } => {
-                        if !self.symbol_ids.contains_key(&symbol) {
-                            eprintln!(
-                                "SYMBOL: {:?} {:?}",
-                                symbol,
-                                self.metadata_for_symbol(symbol)
-                            );
-                        }
                         add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
                         if dynamic_precedence != 0 {
                             add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
@@ -785,7 +883,7 @@ impl Generator {
             {
                 result.push(c);
             } else {
-                result += match c {
+                let replacement = match c {
                     '~' => "TILDE",
                     '`' => "BQUOTE",
                     '!' => "BANG",
@@ -821,7 +919,11 @@ impl Generator {
                     '\r' => "CR",
                     '\t' => "TAB",
                     _ => continue,
+                };
+                if !result.is_empty() && !result.ends_with("_") {
+                    result.push('_');
                 }
+                result += replacement;
             }
         }
         result
@@ -837,6 +939,21 @@ impl Generator {
         }
         result
     }
+
+    fn add_character(&mut self, c: char) {
+        if c.is_ascii() {
+            match c {
+                '\'' => add!(self, "'\\''"),
+                '\\' => add!(self, "'\\\\'"),
+                '\t' => add!(self, "'\\t'"),
+                '\n' => add!(self, "'\\n'"),
+                '\r' => add!(self, "'\\r'"),
+                _ => add!(self, "'{}'", c),
+            }
+        } else {
+            add!(self, "{}", c as u32)
+        }
+    }
 }
 
 pub(crate) fn render_c_code(
@@ -867,3 +984,49 @@ pub(crate) fn render_c_code(
     }
     .generate()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_get_char_ranges() {
+        struct Row {
+            chars: Vec<char>,
+            ruled_out_chars: Vec<char>,
+            expected_ranges: Vec<Range<char>>,
+        }
+
+        let table = [
+            Row {
+                chars: vec!['a'],
+                ruled_out_chars: vec![],
+                expected_ranges: vec!['a'..'a'],
+            },
+            Row {
+                chars: vec!['a', 'b', 'c', 'e', 'z'],
+                ruled_out_chars: vec![],
+                expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'],
+            },
+            Row {
+                chars: vec!['a', 'b', 'c', 'e', 'h', 'z'],
+                ruled_out_chars: vec!['d', 'f', 'g'],
+                expected_ranges: vec!['a'..'h', 'z'..'z'],
+            },
+        ];
+
+        for Row {
+            chars,
+            ruled_out_chars,
+            expected_ranges,
+        } in table.iter()
+        {
+            let ruled_out_chars = ruled_out_chars
+                .into_iter()
+                .map(|c: &char| *c as u32)
+                .collect();
+            let ranges = Generator::get_ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
+            assert_eq!(ranges, *expected_ranges);
+        }
+    }
+}
diff --git a/src/rules.rs b/src/rules.rs
index 3bfd5181..77e50d3c 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -120,7 +120,10 @@ impl Rule {
     pub fn seq(rules: Vec<Rule>) -> Self {
         Rule::Seq(rules)
     }
+}
 
+#[cfg(test)]
+impl Rule {
     pub fn terminal(index: usize) -> Self {
         Rule::Symbol(Symbol::terminal(index))
     }
diff --git a/src/tables.rs b/src/tables.rs
index 344c4816..1c125621 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -1,7 +1,6 @@
 use crate::nfa::CharacterSet;
 use crate::rules::{Alias, Associativity, Symbol};
 use std::collections::HashMap;
-use std::ops::Range;
 
 pub(crate) type AliasSequenceId = usize;
 pub(crate) type ParseStateId = usize;
@@ -50,21 +49,13 @@ pub(crate) struct ParseTable {
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct AdvanceAction {
     pub state: LexStateId,
-    pub precedence: Range<i32>,
     pub in_main_token: bool,
 }
 
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub(crate) struct AcceptTokenAction {
-    pub symbol: Symbol,
-    pub precedence: i32,
-    pub implicit_precedence: i32,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub(crate) struct LexState {
-    pub advance_actions: HashMap<CharacterSet, AdvanceAction>,
-    pub accept_action: Option<AcceptTokenAction>,
+    pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
+    pub accept_action: Option<usize>,
 }
 
 #[derive(Debug, PartialEq, Eq)]

From 3fbaff5e69a1bfd200a7c9979e52412b55a26ba0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 2 Jan 2019 16:48:44 -0800
Subject: [PATCH 082/208] Fix various logic errors in parse table construction

---
 Cargo.lock                             |  18 ++++
 Cargo.toml                             |   5 ++
 src/build_tables/build_lex_table.rs    | 116 +++++++++++++++++++++----
 src/build_tables/build_parse_table.rs  |  59 +++++++------
 src/build_tables/coincident_tokens.rs  |  38 ++++----
 src/build_tables/item.rs               |   4 +-
 src/build_tables/item_set_builder.rs   |   2 +-
 src/build_tables/mod.rs                |  44 +++++-----
 src/build_tables/shrink_parse_table.rs |   6 +-
 src/build_tables/token_conflicts.rs    |   2 +-
 src/grammars.rs                        |   2 +-
 src/logger.rs                          |  29 +++++++
 src/main.rs                            |  28 ++++--
 src/nfa.rs                             |  26 ++++--
 src/parse_grammar.rs                   |   4 +-
 src/prepare_grammar/expand_repeats.rs  |   2 +-
 src/prepare_grammar/extract_tokens.rs  |   2 +-
 src/prepare_grammar/process_inlines.rs |   2 +-
 src/render/mod.rs                      |  19 ++--
 src/rules.rs                           |   2 +-
 src/tables.rs                          |   2 +-
 21 files changed, 297 insertions(+), 115 deletions(-)
 create mode 100644 src/logger.rs

diff --git a/Cargo.lock b/Cargo.lock
index 538517f1..2312d362 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -76,6 +76,11 @@ dependencies = [
  "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "byteorder"
+version = "1.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "cc"
 version = "1.0.25"
@@ -212,6 +217,15 @@ dependencies = [
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "ignore"
 version = "0.4.4"
@@ -463,9 +477,11 @@ version = "0.1.0"
 dependencies = [
  "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -737,6 +753,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
 "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
 "checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
+"checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d"
 "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
 "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
 "checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
@@ -753,6 +770,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
 "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
 "checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
+"checksum hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "64b7d419d0622ae02fe5da6b9a5e1964b610a65bb37923b976aeebb6dbb8f86e"
 "checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01"
 "checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
 "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
diff --git a/Cargo.toml b/Cargo.toml
index b29bc85e..29b10e17 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,7 @@ lazy_static = "1.2.0"
 smallbitvec = "2.3.0"
 clap = "2.32"
 dirs = "1.0.2"
+hashbrown = "0.1"
 ignore = "0.4.4"
 libloading = "0.5"
 rusqlite = "0.14.0"
@@ -20,3 +21,7 @@ regex-syntax = "0.6.4"
 [dependencies.serde_json]
 version = "1.0"
 features = ["preserve_order"]
+
+[dependencies.log]
+version = "0.4.6"
+features = ["std"]
diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index aa929d97..c002f427 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -2,10 +2,9 @@ use super::item::LookaheadSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
 use crate::nfa::NfaCursor;
-use crate::rules::Symbol;
 use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
 use std::collections::hash_map::Entry;
-use std::collections::{HashMap, VecDeque};
+use std::collections::{BTreeMap, HashMap, VecDeque};
 
 pub(crate) fn build_lex_table(
     parse_table: &mut ParseTable,
@@ -16,15 +15,16 @@ pub(crate) fn build_lex_table(
     let keyword_lex_table;
     if syntax_grammar.word_token.is_some() {
         let mut builder = LexTableBuilder::new(lexical_grammar);
-        builder.add_state_for_tokens(keywords.iter());
+        builder.add_state_for_tokens(keywords);
         keyword_lex_table = builder.table;
     } else {
         keyword_lex_table = LexTable::default();
     }
 
     let mut builder = LexTableBuilder::new(lexical_grammar);
-    for state in parse_table.states.iter_mut() {
-        let tokens = state.terminal_entries.keys().filter_map(|token| {
+    for (i, state) in parse_table.states.iter_mut().enumerate() {
+        info!("populate lex state for parse state {}", i);
+        let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
             if token.is_terminal() {
                 if keywords.contains(&token) {
                     syntax_grammar.word_token
@@ -34,11 +34,14 @@ pub(crate) fn build_lex_table(
             } else {
                 None
             }
-        });
-        state.lex_state_id = builder.add_state_for_tokens(tokens);
+        }));
+        state.lex_state_id = builder.add_state_for_tokens(&tokens);
     }
 
-    (builder.table, keyword_lex_table)
+    let mut table = builder.table;
+    shrink_lex_table(&mut table, parse_table);
+
+    (table, keyword_lex_table)
 }
 
 struct LexTableBuilder<'a> {
@@ -60,32 +63,49 @@ impl<'a> LexTableBuilder<'a> {
         }
     }
 
-    fn add_state_for_tokens(&mut self, tokens: impl Iterator<Item = Symbol>) -> usize {
+    fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
         let nfa_states = tokens
+            .iter()
             .map(|token| self.lexical_grammar.variables[token.index].start_state)
             .collect();
-        let result = self.add_state(nfa_states);
-        while let Some((state_id, nfa_states)) = self.state_queue.pop_front() {
+        let (state_id, is_new) = self.add_state(nfa_states);
+
+        if is_new {
+            info!(
+                "entry point state: {}, tokens: {:?}",
+                state_id,
+                tokens
+                    .iter()
+                    .map(|t| &self.lexical_grammar.variables[t.index].name)
+                    .collect::<Vec<_>>()
+            );
+        }
+
+        while let Some((state_id, nfa_states)) = self.state_queue.pop_back() {
             self.populate_state(state_id, nfa_states);
         }
-        result
+        state_id
     }
 
-    fn add_state(&mut self, nfa_states: Vec<u32>) -> usize {
-        match self.state_ids_by_nfa_state_set.entry(nfa_states) {
-            Entry::Occupied(o) => *o.get(),
+    fn add_state(&mut self, nfa_states: Vec<u32>) -> (usize, bool) {
+        self.cursor.reset(nfa_states);
+        match self
+            .state_ids_by_nfa_state_set
+            .entry(self.cursor.state_ids.clone())
+        {
+            Entry::Occupied(o) => (*o.get(), false),
             Entry::Vacant(v) => {
                 let state_id = self.table.states.len();
                 self.table.states.push(LexState::default());
                 self.state_queue.push_back((state_id, v.key().clone()));
                 v.insert(state_id);
-                state_id
+                (state_id, true)
             }
         }
     }
 
     fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>) {
-        self.cursor.reset(nfa_states);
+        self.cursor.force_reset(nfa_states);
 
         let mut completion = None;
         for (id, prec) in self.cursor.completions() {
@@ -102,12 +122,16 @@ impl<'a> LexTableBuilder<'a> {
         }
 
         for (chars, advance_precedence, next_states, is_sep) in self.cursor.grouped_successors() {
+            info!(
+                "populate state: {}, characters: {:?}, precedence: {:?}",
+                state_id, chars, advance_precedence
+            );
             if let Some((_, completed_precedence)) = completion {
                 if advance_precedence < completed_precedence {
                     continue;
                 }
             }
-            let next_state_id = self.add_state(next_states);
+            let (next_state_id, _) = self.add_state(next_states);
             self.table.states[state_id].advance_actions.push((
                 chars,
                 AdvanceAction {
@@ -122,3 +146,59 @@ impl<'a> LexTableBuilder<'a> {
         }
     }
 }
+
+fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
+    let mut state_replacements = BTreeMap::new();
+    let mut done = false;
+    while !done {
+        done = true;
+        for (i, state_i) in table.states.iter().enumerate() {
+            if state_replacements.contains_key(&i) {
+                continue;
+            }
+            for (j, state_j) in table.states.iter().enumerate() {
+                if state_replacements.contains_key(&j) {
+                    continue;
+                }
+                if j == i {
+                    break;
+                }
+                if state_i == state_j {
+                    info!("replace state {} with state {}", i, j);
+                    state_replacements.insert(i, j);
+                    done = false;
+                }
+            }
+        }
+        for state in table.states.iter_mut() {
+            for advance_action in state.advance_actions.iter_mut() {
+                if let Some(new_state_id) = state_replacements.get(&advance_action.1.state) {
+                    advance_action.1.state = *new_state_id;
+                }
+            }
+        }
+    }
+
+    let final_state_replacements = (0..table.states.len()).into_iter().map(|state_id| {
+        let replacement = state_replacements.get(&state_id).cloned().unwrap_or(state_id);
+        let prior_removed = state_replacements.iter().take_while(|i| *i.0 < replacement).count();
+        replacement - prior_removed
+    }).collect::<Vec<_>>();
+
+    for state in parse_table.states.iter_mut() {
+        state.lex_state_id = final_state_replacements[state.lex_state_id];
+    }
+
+    for state in table.states.iter_mut() {
+        for advance_action in state.advance_actions.iter_mut() {
+            advance_action.1.state = final_state_replacements[advance_action.1.state];
+        }
+    }
+
+    let mut i = 0;
+    table.states.retain(|_| {
+        let result = !state_replacements.contains_key(&i);
+        i += 1;
+        result
+    });
+}
diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index c17261dc..ada34dff 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -7,8 +7,11 @@ use crate::tables::{
     AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
 };
 use core::ops::Range;
-use std::collections::hash_map::{DefaultHasher, Entry};
-use std::collections::{HashMap, HashSet, VecDeque};
+use hashbrown::hash_map::Entry;
+use hashbrown::{HashMap, HashSet};
+use std::collections::hash_map::DefaultHasher;
+use std::collections::VecDeque;
+
 use std::fmt::Write;
 use std::hash::Hasher;
 
@@ -43,9 +46,10 @@ impl<'a> ParseTableBuilder<'a> {
         // Ensure that the empty alias sequence has index 0.
         self.parse_table.alias_sequences.push(Vec::new());
 
-        // Ensure that the error state has index 0.
+        // Add the error state at index 0.
         self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
 
+        // Add the starting state at index 1.
         self.add_parse_state(
             &Vec::new(),
             &Vec::new(),
@@ -61,6 +65,8 @@ impl<'a> ParseTableBuilder<'a> {
 
         self.process_part_state_queue()?;
         self.populate_used_symbols();
+        self.remove_precedences();
+
         Ok((self.parse_table, self.following_tokens))
     }
 
@@ -112,28 +118,9 @@ impl<'a> ParseTableBuilder<'a> {
 
     fn process_part_state_queue(&mut self) -> Result<()> {
         while let Some(entry) = self.parse_state_queue.pop_front() {
-            let debug = false;
-
-            if debug {
-                println!(
-                    "ITEM SET {}:\n{}",
-                    entry.state_id,
-                    self.item_sets_by_state_id[entry.state_id]
-                        .display_with(&self.syntax_grammar, &self.lexical_grammar,)
-                );
-            }
-
             let item_set = self
                 .item_set_builder
                 .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
-
-            if debug {
-                println!(
-                    "TRANSITIVE CLOSURE:\n{}",
-                    item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
-                );
-            }
-
             self.add_actions(
                 entry.preceding_symbols,
                 entry.preceding_auxiliary_symbols,
@@ -527,6 +514,7 @@ impl<'a> ParseTableBuilder<'a> {
     }
 
     fn populate_used_symbols(&mut self) {
+        self.parse_table.symbols.push(Symbol::end());
         let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
         let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
         let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
@@ -542,20 +530,39 @@ impl<'a> ParseTableBuilder<'a> {
                 non_terminal_usages[symbol.index] = true;
             }
         }
-        self.parse_table.symbols.push(Symbol::end());
         for (i, value) in terminal_usages.into_iter().enumerate() {
             if value {
                 self.parse_table.symbols.push(Symbol::terminal(i));
             }
         }
+        for (i, value) in external_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::external(i));
+            }
+        }
         for (i, value) in non_terminal_usages.into_iter().enumerate() {
             if value {
                 self.parse_table.symbols.push(Symbol::non_terminal(i));
             }
         }
-        for (i, value) in external_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::external(i));
+    }
+
+    fn remove_precedences(&mut self) {
+        for state in self.parse_table.states.iter_mut() {
+            for (_, entry) in state.terminal_entries.iter_mut() {
+                for action in entry.actions.iter_mut() {
+                    match action {
+                        ParseAction::Reduce {
+                            precedence,
+                            associativity,
+                            ..
+                        } => {
+                            *precedence = 0;
+                            *associativity = None;
+                        }
+                        _ => {}
+                    }
+                }
             }
         }
     }
diff --git a/src/build_tables/coincident_tokens.rs b/src/build_tables/coincident_tokens.rs
index 10707489..5f2bb3ec 100644
--- a/src/build_tables/coincident_tokens.rs
+++ b/src/build_tables/coincident_tokens.rs
@@ -1,36 +1,44 @@
+use crate::grammars::LexicalGrammar;
 use crate::rules::Symbol;
 use crate::tables::{ParseStateId, ParseTable};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 
 pub(crate) struct CoincidentTokenIndex {
-    entries: HashMap<(Symbol, Symbol), HashSet<ParseStateId>>,
-    empty: HashSet<ParseStateId>,
+    entries: Vec<HashSet<ParseStateId>>,
+    n: usize,
 }
 
 impl CoincidentTokenIndex {
-    pub fn new(table: &ParseTable) -> Self {
-        let mut entries = HashMap::new();
+    pub fn new(table: &ParseTable, lexical_grammar: &LexicalGrammar) -> Self {
+        let n = lexical_grammar.variables.len();
+        let mut result = Self {
+            n,
+            entries: vec![HashSet::new(); n * n],
+        };
         for (i, state) in table.states.iter().enumerate() {
             for symbol in state.terminal_entries.keys() {
                 for other_symbol in state.terminal_entries.keys() {
-                    entries
-                        .entry((*symbol, *other_symbol))
-                        .or_insert(HashSet::new())
-                        .insert(i);
+                    let index = result.index(*symbol, *other_symbol);
+                    result.entries[index].insert(i);
                 }
             }
         }
-        Self {
-            entries,
-            empty: HashSet::new(),
-        }
+        result
     }
 
     pub fn states_with(&self, a: Symbol, b: Symbol) -> &HashSet<ParseStateId> {
-        self.entries.get(&(a, b)).unwrap_or(&self.empty)
+        &self.entries[self.index(a, b)]
     }
 
     pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
-        self.entries.contains_key(&(a, b))
+        !self.entries[self.index(a, b)].is_empty()
+    }
+
+    fn index(&self, a: Symbol, b: Symbol) -> usize {
+        if a.index < b.index {
+            a.index * self.n + b.index
+        } else {
+            b.index * self.n + a.index
+        }
     }
 }
diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index 4cd2f643..511d7bef 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -112,7 +112,9 @@ impl LookaheadSet {
                 return;
             }
         };
-        vec.resize(other.index + 1, false);
+        if other.index >= vec.len() {
+            vec.resize(other.index + 1, false);
+        }
         vec.set(other.index, true);
     }
 
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
index 5e61bfcc..5714e7e2 100644
--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@@ -1,7 +1,7 @@
 use super::item::{LookaheadSet, ParseItem, ParseItemSet};
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
 use crate::rules::Symbol;
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 struct TransitiveClosureAddition<'a> {
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 8b3a2db4..207431dd 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -27,22 +27,14 @@ pub(crate) fn build_tables(
     let (mut parse_table, following_tokens) =
         build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
     let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
-
-    eprintln!("{:?}", token_conflict_map);
-
-    let coincident_token_index = CoincidentTokenIndex::new(&parse_table);
-    let keywords = if let Some(word_token) = syntax_grammar.word_token {
-        identify_keywords(
-            lexical_grammar,
-            &parse_table,
-            word_token,
-            &token_conflict_map,
-            &coincident_token_index,
-        )
-    } else {
-        LookaheadSet::new()
-    };
-
+    let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
+    let keywords = identify_keywords(
+        lexical_grammar,
+        &parse_table,
+        syntax_grammar.word_token,
+        &token_conflict_map,
+        &coincident_token_index,
+    );
     populate_error_state(
         &mut parse_table,
         syntax_grammar,
@@ -123,10 +115,15 @@ fn populate_error_state(
 fn identify_keywords(
     lexical_grammar: &LexicalGrammar,
     parse_table: &ParseTable,
-    word_token: Symbol,
+    word_token: Option<Symbol>,
     token_conflict_map: &TokenConflictMap,
     coincident_token_index: &CoincidentTokenIndex,
 ) -> LookaheadSet {
+    if word_token.is_none() {
+        return LookaheadSet::new();
+    }
+
+    let word_token = word_token.unwrap();
     let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());
 
     // First find all of the candidate keyword tokens: tokens that start with
@@ -137,6 +134,7 @@ fn identify_keywords(
             if all_chars_are_alphabetical(&cursor)
                 && token_conflict_map.does_match_same_string(i, word_token.index)
             {
+                info!("Keywords - add candidate {}", lexical_grammar.variables[i].name);
                 Some(Symbol::terminal(i))
             } else {
                 None
@@ -150,8 +148,8 @@ fn identify_keywords(
             if other_token != *token
                 && token_conflict_map.does_match_same_string(token.index, other_token.index)
             {
-                eprintln!(
-                    "Exclude {} from keywords because it matches the same string as {}",
+                info!(
+                    "Keywords - exclude {} because it matches the same string as {}",
                     lexical_grammar.variables[token.index].name,
                     lexical_grammar.variables[other_token.index].name
                 );
@@ -189,8 +187,8 @@ fn identify_keywords(
                 word_token.index,
                 other_index,
             ) {
-                eprintln!(
-                    "Exclude {} from keywords because of conflict with {}",
+                info!(
+                    "Keywords - exclude {} because of conflict with {}",
                     lexical_grammar.variables[token.index].name,
                     lexical_grammar.variables[other_index].name
                 );
@@ -198,8 +196,8 @@ fn identify_keywords(
             }
         }
 
-        eprintln!(
-            "Include {} in keywords",
+        info!(
+            "Keywords - include {}",
             lexical_grammar.variables[token.index].name,
         );
         true
diff --git a/src/build_tables/shrink_parse_table.rs b/src/build_tables/shrink_parse_table.rs
index b943158f..33b72c32 100644
--- a/src/build_tables/shrink_parse_table.rs
+++ b/src/build_tables/shrink_parse_table.rs
@@ -2,7 +2,7 @@ use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{SyntaxGrammar, VariableType};
 use crate::rules::{AliasMap, Symbol};
 use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};
 
 pub(crate) fn shrink_parse_table(
     parse_table: &mut ParseTable,
@@ -240,6 +240,10 @@ fn can_add_entry_to_state(
 
 fn remove_unused_states(parse_table: &mut ParseTable) {
     let mut state_usage_map = vec![false; parse_table.states.len()];
+
+    state_usage_map[0] = true;
+    state_usage_map[1] = true;
+
     for state in &parse_table.states {
         for referenced_state in state.referenced_states() {
             state_usage_map[referenced_state] = true;
diff --git a/src/build_tables/token_conflicts.rs b/src/build_tables/token_conflicts.rs
index 9f1c4426..18a80484 100644
--- a/src/build_tables/token_conflicts.rs
+++ b/src/build_tables/token_conflicts.rs
@@ -1,7 +1,7 @@
 use crate::build_tables::item::LookaheadSet;
 use crate::grammars::LexicalGrammar;
 use crate::nfa::{CharacterSet, NfaCursor};
-use std::collections::HashSet;
+use hashbrown::HashSet;
 use std::fmt;
 
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
diff --git a/src/grammars.rs b/src/grammars.rs
index d23e8ca6..7f587a8c 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -1,6 +1,6 @@
 use crate::nfa::Nfa;
 use crate::rules::{Alias, Associativity, Rule, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum VariableType {
diff --git a/src/logger.rs b/src/logger.rs
new file mode 100644
index 00000000..18df763d
--- /dev/null
+++ b/src/logger.rs
@@ -0,0 +1,29 @@
+use log::{LevelFilter, Log, Metadata, Record};
+
+struct Logger {
+    pub filter: Option<String>,
+}
+
+impl Log for Logger {
+    fn enabled(&self, _: &Metadata) -> bool {
+        true
+    }
+
+    fn log(&self, record: &Record) {
+        eprintln!(
+            "[{}] {}",
+            record
+                .module_path()
+                .unwrap_or_default()
+                .trim_start_matches("rust_tree_sitter_cli::"),
+            record.args()
+        );
+    }
+
+    fn flush(&self) {}
+}
+
+pub(crate) fn init() {
+    log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
+    log::set_max_level(LevelFilter::Info);
+}
diff --git a/src/main.rs b/src/main.rs
index cd672186..a08922b7 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,20 +1,23 @@
 #[macro_use]
-extern crate serde_derive;
-#[macro_use]
-extern crate serde_json;
-#[macro_use]
 extern crate lazy_static;
+#[macro_use]
+extern crate log;
+#[macro_use]
+extern crate serde_derive;
+extern crate hashbrown;
+extern crate serde_json;
 
-use std::path::PathBuf;
 use clap::{App, Arg, SubCommand};
 use std::env;
 use std::io::Write;
+use std::path::PathBuf;
 use std::process::{Command, Stdio};
 
 mod build_tables;
 mod error;
 mod generate;
 mod grammars;
+mod logger;
 mod nfa;
 mod parse_grammar;
 mod prepare_grammar;
@@ -27,7 +30,11 @@ fn main() -> error::Result<()> {
         .version("0.1")
         .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
         .about("Generates and tests parsers")
-        .subcommand(SubCommand::with_name("generate").about("Generate a parser"))
+        .subcommand(
+            SubCommand::with_name("generate")
+                .about("Generate a parser")
+                .arg(Arg::with_name("log").long("log")),
+        )
         .subcommand(
             SubCommand::with_name("parse")
                 .about("Parse a file")
@@ -42,7 +49,11 @@ fn main() -> error::Result<()> {
         )
         .get_matches();
 
-    if let Some(_) = matches.subcommand_matches("generate") {
+    if let Some(matches) = matches.subcommand_matches("generate") {
+        if matches.is_present("log") {
+            logger::init();
+        }
+
         let mut grammar_path = env::current_dir().expect("Failed to read CWD");
         grammar_path.push("grammar.js");
         let grammar_json = load_js_grammar_file(grammar_path);
@@ -70,7 +81,8 @@ fn load_js_grammar_file(grammar_path: PathBuf) -> String {
         "{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
         js_prelude,
         grammar_path.to_str().unwrap()
-    ).expect("Failed to write to node's stdin");
+    )
+    .expect("Failed to write to node's stdin");
     drop(node_stdin);
     let output = node_process
         .wait_with_output()
diff --git a/src/nfa.rs b/src/nfa.rs
index e14dac44..1c7ff53b 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -320,6 +320,10 @@ impl<'a> NfaCursor<'a> {
         self.add_states(&mut states);
     }
 
+    pub fn force_reset(&mut self, states: Vec<u32>) {
+        self.state_ids = states
+    }
+
     pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32, bool)> {
         self.state_ids.iter().filter_map(move |id| {
             if let NfaState::Advance {
@@ -352,16 +356,26 @@ impl<'a> NfaCursor<'a> {
                     result[i].1 = max(result[i].1, prec);
                     result[i].2.push(state);
                     result[i].3 |= is_sep;
-                } else {
-                    let intersection = result[i].0.remove_intersection(&mut chars);
-                    if !intersection.is_empty() {
-                        let mut states = result[i].2.clone();
-                        states.push(state);
+                    chars = CharacterSet::empty();
+                    break;
+                }
+
+                let intersection = result[i].0.remove_intersection(&mut chars);
+                if !intersection.is_empty() {
+                    let mut states = result[i].2.clone();
+                    let max_prec = max(result[i].1, prec);
+                    states.push(state);
+                    if result[i].0.is_empty() {
+                        result[i].0 = intersection;
+                        result[i].1 = max_prec;
+                        result[i].2 = states;
+                        result[i].3 |= is_sep;
+                    } else {
                         result.insert(
                             i,
                             (
                                 intersection,
-                                max(result[i].1, prec),
+                                max_prec,
                                 states,
                                 result[i].3 || is_sep,
                             ),
diff --git a/src/parse_grammar.rs b/src/parse_grammar.rs
index 07396329..6808f402 100644
--- a/src/parse_grammar.rs
+++ b/src/parse_grammar.rs
@@ -133,7 +133,7 @@ mod tests {
 
     #[test]
     fn test_parse_grammar() {
-        let grammar = parse_grammar(&json!({
+        let grammar = parse_grammar(r#"{
             "name": "my_lang",
             "rules": {
                 "file": {
@@ -148,7 +148,7 @@ mod tests {
                     "value": "foo"
                 }
             }
-        }).to_string()).unwrap();
+        }"#).unwrap();
 
         assert_eq!(grammar.name, "my_lang");
         assert_eq!(grammar.variables, vec![
diff --git a/src/prepare_grammar/expand_repeats.rs b/src/prepare_grammar/expand_repeats.rs
index f3811c5f..4589bd11 100644
--- a/src/prepare_grammar/expand_repeats.rs
+++ b/src/prepare_grammar/expand_repeats.rs
@@ -1,7 +1,7 @@
 use super::ExtractedSyntaxGrammar;
 use crate::grammars::{Variable, VariableType};
 use crate::rules::{Rule, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 use std::mem;
 
 struct Expander {
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index 5f3f6e16..115933ee 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -2,7 +2,7 @@ use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
 use crate::error::{Error, Result};
 use crate::grammars::{ExternalToken, Variable, VariableType};
 use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 use std::mem;
 
 pub(super) fn extract_tokens(
diff --git a/src/prepare_grammar/process_inlines.rs b/src/prepare_grammar/process_inlines.rs
index 0d7f6827..24bbc14d 100644
--- a/src/prepare_grammar/process_inlines.rs
+++ b/src/prepare_grammar/process_inlines.rs
@@ -1,5 +1,5 @@
 use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 struct ProductionStepId {
diff --git a/src/render/mod.rs b/src/render/mod.rs
index cbb8ba0d..250218c1 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -1,9 +1,9 @@
 use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
 use crate::nfa::CharacterSet;
 use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
-use crate::tables::{LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
+use crate::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
 use core::ops::Range;
-use std::collections::{HashMap, HashSet};
+use hashbrown::{HashMap, HashSet};
 use std::fmt::Write;
 use std::mem::swap;
 
@@ -372,17 +372,14 @@ impl Generator {
             if self.add_character_set_condition(&characters, &ruled_out_characters) {
                 add!(self, ")\n");
                 indent!(self);
-                if action.in_main_token {
-                    add_line!(self, "ADVANCE({});", action.state);
-                } else {
-                    add_line!(self, "SKIP({});", action.state);
-                }
+                self.add_advance_action(&action);
                 if let CharacterSet::Include(chars) = characters {
                     ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
                 }
                 dedent!(self);
             } else {
                 self.buffer.truncate(previous_length);
+                self.add_advance_action(&action);
             }
         }
 
@@ -494,6 +491,14 @@ impl Generator {
             })
     }
 
+    fn add_advance_action(&mut self, action: &AdvanceAction) {
+        if action.in_main_token {
+            add_line!(self, "ADVANCE({});", action.state);
+        } else {
+            add_line!(self, "SKIP({});", action.state);
+        }
+    }
+
     fn add_lex_modes_list(&mut self) {
         self.get_external_scanner_state_id(HashSet::new());
 
diff --git a/src/rules.rs b/src/rules.rs
index 77e50d3c..ad16c632 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) enum SymbolType {
diff --git a/src/tables.rs b/src/tables.rs
index 1c125621..21222135 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -1,6 +1,6 @@
 use crate::nfa::CharacterSet;
 use crate::rules::{Alias, Associativity, Symbol};
-use std::collections::HashMap;
+use hashbrown::HashMap;
 
 pub(crate) type AliasSequenceId = usize;
 pub(crate) type ParseStateId = usize;

From 92d4fe419c291f48233a8cbcd5073111e2ebfaa7 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 10:30:59 -0800
Subject: [PATCH 083/208] Fix character set intersection bugs

---
 src/nfa.rs | 159 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 125 insertions(+), 34 deletions(-)

diff --git a/src/nfa.rs b/src/nfa.rs
index 1c7ff53b..b746200f 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -149,14 +149,18 @@ impl CharacterSet {
                     CharacterSet::Include(removed)
                 }
                 CharacterSet::Exclude(other_chars) => {
-                    let removed = remove_chars(chars, other_chars, true);
+                    let mut result_exclusion = chars.clone();
+                    result_exclusion.extend(other_chars.iter().cloned());
+                    result_exclusion.sort_unstable();
+                    result_exclusion.dedup();
+                    remove_chars(chars, other_chars, true);
                     let mut included_characters = Vec::new();
                     let mut other_included_characters = Vec::new();
                     swap(&mut included_characters, other_chars);
                     swap(&mut other_included_characters, chars);
                     *self = CharacterSet::Include(included_characters);
                     *other = CharacterSet::Include(other_included_characters);
-                    CharacterSet::Exclude(removed)
+                    CharacterSet::Exclude(result_exclusion)
                 }
             },
         }
@@ -351,35 +355,24 @@ impl<'a> NfaCursor<'a> {
         for (chars, prec, state, is_sep) in iter {
             let mut chars = chars.clone();
             let mut i = 0;
-            while i < result.len() {
-                if result[i].0 == chars {
-                    result[i].1 = max(result[i].1, prec);
-                    result[i].2.push(state);
-                    result[i].3 |= is_sep;
-                    chars = CharacterSet::empty();
-                    break;
-                }
-
+            while i < result.len() && !chars.is_empty() {
                 let intersection = result[i].0.remove_intersection(&mut chars);
                 if !intersection.is_empty() {
-                    let mut states = result[i].2.clone();
-                    let max_prec = max(result[i].1, prec);
-                    states.push(state);
+                    let mut intersection_states = result[i].2.clone();
+                    match intersection_states.binary_search(&state) {
+                        Err(j) => intersection_states.insert(j, state),
+                        _ => {}
+                    }
+                    let intersection_entry = (
+                        intersection,
+                        max(result[i].1, prec),
+                        intersection_states,
+                        result[i].3 || is_sep,
+                    );
                     if result[i].0.is_empty() {
-                        result[i].0 = intersection;
-                        result[i].1 = max_prec;
-                        result[i].2 = states;
-                        result[i].3 |= is_sep;
+                        result[i] = intersection_entry;
                     } else {
-                        result.insert(
-                            i,
-                            (
-                                intersection,
-                                max_prec,
-                                states,
-                                result[i].3 || is_sep,
-                            ),
-                        );
+                        result.insert(i, intersection_entry);
                         i += 1;
                     }
                 }
@@ -444,6 +437,7 @@ mod tests {
     #[test]
     fn test_group_successors() {
         let table = [
+            // overlapping character classes
             (
                 vec![
                     (CharacterSet::empty().add_range('a', 'f'), 0, 1, false),
@@ -460,6 +454,7 @@ mod tests {
                     (CharacterSet::empty().add_range('g', 'i'), 1, vec![2], false),
                 ],
             ),
+            // large character class followed by many individual characters
             (
                 vec![
                     (CharacterSet::empty().add_range('a', 'z'), 0, 1, false),
@@ -483,6 +478,63 @@ mod tests {
                     ),
                 ],
             ),
+            // negated character class followed by an individual character
+            (
+                vec![
+                    (CharacterSet::empty().add_char('0'), 0, 1, false),
+                    (CharacterSet::empty().add_char('b'), 0, 2, false),
+                    (
+                        CharacterSet::empty().add_range('a', 'f').negate(),
+                        0,
+                        3,
+                        false,
+                    ),
+                    (CharacterSet::empty().add_char('c'), 0, 4, false),
+                ],
+                vec![
+                    (CharacterSet::empty().add_char('0'), 0, vec![1, 3], false),
+                    (CharacterSet::empty().add_char('b'), 0, vec![2], false),
+                    (CharacterSet::empty().add_char('c'), 0, vec![4], false),
+                    (
+                        CharacterSet::empty()
+                            .add_range('a', 'f')
+                            .add_char('0')
+                            .negate(),
+                        0,
+                        vec![3],
+                        false,
+                    ),
+                ],
+            ),
+            // multiple negated character classes
+            (
+                vec![
+                    (CharacterSet::Include(vec!['a']), 0, 1, false),
+                    (CharacterSet::Exclude(vec!['a', 'b', 'c']), 0, 2, false),
+                    (CharacterSet::Include(vec!['g']), 0, 6, false),
+                    (CharacterSet::Exclude(vec!['d', 'e', 'f']), 0, 3, false),
+                    (CharacterSet::Exclude(vec!['g', 'h', 'i']), 0, 4, false),
+                    (CharacterSet::Include(vec!['g']), 0, 5, false),
+                ],
+                vec![
+                    (CharacterSet::Include(vec!['a']), 0, vec![1, 3, 4], false),
+                    (CharacterSet::Include(vec!['g']), 0, vec![2, 3, 5, 6], false),
+                    (CharacterSet::Include(vec!['b', 'c']), 0, vec![3, 4], false),
+                    (CharacterSet::Include(vec!['h', 'i']), 0, vec![2, 3], false),
+                    (
+                        CharacterSet::Include(vec!['d', 'e', 'f']),
+                        0,
+                        vec![2, 4],
+                        false,
+                    ),
+                    (
+                        CharacterSet::Exclude(vec!['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']),
+                        0,
+                        vec![2, 3, 4],
+                        false,
+                    ),
+                ],
+            ),
         ];
 
         for row in table.iter() {
@@ -495,8 +547,8 @@ mod tests {
 
     #[test]
     fn test_character_set_remove_intersection() {
-        // whitelist - whitelist
-        // both sets contain 'c', 'd', and 'f'
+        // A whitelist and an overlapping whitelist.
+        // Both sets contain 'c', 'd', and 'f'
         let mut a = CharacterSet::empty().add_range('a', 'f');
         let mut b = CharacterSet::empty().add_range('c', 'h');
         assert_eq!(
@@ -515,8 +567,37 @@ mod tests {
         assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
         assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
 
-        // whitelist - blacklist
-        // both sets contain 'e', 'f', and 'm'
+        // A whitelist and a larger whitelist.
+        let mut a = CharacterSet::empty().add_char('c');
+        let mut b = CharacterSet::empty().add_range('a', 'e');
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::empty().add_char('c')
+        );
+        assert_eq!(a, CharacterSet::empty());
+        assert_eq!(
+            b,
+            CharacterSet::empty()
+                .add_range('a', 'b')
+                .add_range('d', 'e')
+        );
+
+        let mut a = CharacterSet::empty().add_char('c');
+        let mut b = CharacterSet::empty().add_range('a', 'e');
+        assert_eq!(
+            b.remove_intersection(&mut a),
+            CharacterSet::empty().add_char('c')
+        );
+        assert_eq!(a, CharacterSet::empty());
+        assert_eq!(
+            b,
+            CharacterSet::empty()
+                .add_range('a', 'b')
+                .add_range('d', 'e')
+        );
+
+        // A whitelist and an intersecting blacklist.
+        // Both sets contain 'e', 'f', and 'm'
         let mut a = CharacterSet::empty()
             .add_range('c', 'h')
             .add_range('k', 'm');
@@ -545,16 +626,26 @@ mod tests {
         assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
         assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
 
-        // blacklist - blacklist
-        // both sets exclude 'c', 'd', and 'e'
+        // A blacklist and an overlapping blacklist.
+        // Both sets exclude 'c', 'd', and 'e'
         let mut a = CharacterSet::empty().add_range('a', 'e').negate();
         let mut b = CharacterSet::empty().add_range('c', 'h').negate();
         assert_eq!(
             a.remove_intersection(&mut b),
-            CharacterSet::Exclude(vec!['c', 'd', 'e'])
+            CharacterSet::empty().add_range('a', 'h').negate(),
         );
         assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
         assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
+
+        // A blacklist and a larger blacklist.
+        let mut a = CharacterSet::empty().add_range('b', 'c').negate();
+        let mut b = CharacterSet::empty().add_range('a', 'd').negate();
+        assert_eq!(
+            a.remove_intersection(&mut b),
+            CharacterSet::empty().add_range('a', 'd').negate(),
+        );
+        assert_eq!(a, CharacterSet::empty().add_char('a').add_char('d'));
+        assert_eq!(b, CharacterSet::empty());
     }
 
     #[test]

From 82fda8929e0019f6ba676f659677e84000ae1632 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 10:31:14 -0800
Subject: [PATCH 084/208] Add EOF actions to lex table

---
 src/build_tables/build_lex_table.rs   | 97 +++++++++++++++++++++------
 src/build_tables/coincident_tokens.rs | 11 +--
 src/render/mod.rs                     | 19 +++---
 src/rules.rs                          |  4 ++
 src/tables.rs                         |  2 +-
 5 files changed, 96 insertions(+), 37 deletions(-)

diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index c002f427..66a4fe43 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -1,7 +1,8 @@
 use super::item::LookaheadSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
-use crate::nfa::NfaCursor;
+use crate::nfa::{CharacterSet, NfaCursor};
+use crate::rules::Symbol;
 use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
 use std::collections::hash_map::Entry;
 use std::collections::{BTreeMap, HashMap, VecDeque};
@@ -23,7 +24,6 @@ pub(crate) fn build_lex_table(
 
     let mut builder = LexTableBuilder::new(lexical_grammar);
     for (i, state) in parse_table.states.iter_mut().enumerate() {
-        info!("populate lex state for parse state {}", i);
         let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
             if token.is_terminal() {
                 if keywords.contains(&token) {
@@ -31,10 +31,13 @@ pub(crate) fn build_lex_table(
                 } else {
                     Some(*token)
                 }
+            } else if token.is_eof() {
+                Some(*token)
             } else {
                 None
             }
         }));
+        info!("populate lex state for parse state {}", i);
         state.lex_state_id = builder.add_state_for_tokens(&tokens);
     }
 
@@ -44,12 +47,18 @@ pub(crate) fn build_lex_table(
     (table, keyword_lex_table)
 }
 
+struct QueueEntry {
+    state_id: usize,
+    nfa_states: Vec<u32>,
+    eof_valid: bool,
+}
+
 struct LexTableBuilder<'a> {
     lexical_grammar: &'a LexicalGrammar,
     cursor: NfaCursor<'a>,
     table: LexTable,
-    state_queue: VecDeque<(usize, Vec<u32>)>,
-    state_ids_by_nfa_state_set: HashMap<Vec<u32>, usize>,
+    state_queue: VecDeque<QueueEntry>,
+    state_ids_by_nfa_state_set: HashMap<(Vec<u32>, bool), usize>,
 }
 
 impl<'a> LexTableBuilder<'a> {
@@ -64,11 +73,19 @@ impl<'a> LexTableBuilder<'a> {
     }
 
     fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
+        let mut eof_valid = false;
         let nfa_states = tokens
             .iter()
-            .map(|token| self.lexical_grammar.variables[token.index].start_state)
+            .filter_map(|token| {
+                if token.is_terminal() {
+                    Some(self.lexical_grammar.variables[token.index].start_state)
+                } else {
+                    eof_valid = true;
+                    None
+                }
+            })
             .collect();
-        let (state_id, is_new) = self.add_state(nfa_states);
+        let (state_id, is_new) = self.add_state(nfa_states, eof_valid);
 
         if is_new {
             info!(
@@ -81,32 +98,42 @@ impl<'a> LexTableBuilder<'a> {
             );
         }
 
-        while let Some((state_id, nfa_states)) = self.state_queue.pop_back() {
-            self.populate_state(state_id, nfa_states);
+        while let Some(QueueEntry {
+            state_id,
+            nfa_states,
+            eof_valid,
+        }) = self.state_queue.pop_front()
+        {
+            self.populate_state(state_id, nfa_states, eof_valid);
         }
         state_id
     }
 
-    fn add_state(&mut self, nfa_states: Vec<u32>) -> (usize, bool) {
+    fn add_state(&mut self, nfa_states: Vec<u32>, eof_valid: bool) -> (usize, bool) {
         self.cursor.reset(nfa_states);
         match self
             .state_ids_by_nfa_state_set
-            .entry(self.cursor.state_ids.clone())
+            .entry((self.cursor.state_ids.clone(), eof_valid))
         {
             Entry::Occupied(o) => (*o.get(), false),
             Entry::Vacant(v) => {
                 let state_id = self.table.states.len();
                 self.table.states.push(LexState::default());
-                self.state_queue.push_back((state_id, v.key().clone()));
+                self.state_queue.push_back(QueueEntry {
+                    state_id,
+                    nfa_states: v.key().0.clone(),
+                    eof_valid,
+                });
                 v.insert(state_id);
                 (state_id, true)
             }
         }
     }
 
-    fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>) {
+    fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>, eof_valid: bool) {
         self.cursor.force_reset(nfa_states);
 
+        // The EOF state is represented as an empty list of NFA states.
         let mut completion = None;
         for (id, prec) in self.cursor.completions() {
             if let Some((prev_id, prev_precedence)) = completion {
@@ -121,7 +148,24 @@ impl<'a> LexTableBuilder<'a> {
             completion = Some((id, prec));
         }
 
-        for (chars, advance_precedence, next_states, is_sep) in self.cursor.grouped_successors() {
+        info!("raw successors: {:?}", self.cursor.successors().collect::<Vec<_>>());
+        let successors = self.cursor.grouped_successors();
+
+        // If EOF is a valid lookahead token, add a transition predicated on the null
+        // character that leads to the empty set of NFA states.
+        if eof_valid {
+            let (next_state_id, _) = self.add_state(Vec::new(), false);
+            info!("populate state: {}, character: EOF", state_id);
+            self.table.states[state_id].advance_actions.push((
+                CharacterSet::empty().add_char('\0'),
+                AdvanceAction {
+                    state: next_state_id,
+                    in_main_token: true,
+                },
+            ));
+        }
+
+        for (chars, advance_precedence, next_states, is_sep) in successors {
             info!(
                 "populate state: {}, characters: {:?}, precedence: {:?}",
                 state_id, chars, advance_precedence
@@ -131,7 +175,7 @@ impl<'a> LexTableBuilder<'a> {
                     continue;
                 }
             }
-            let (next_state_id, _) = self.add_state(next_states);
+            let (next_state_id, _) = self.add_state(next_states, eof_valid && is_sep);
             self.table.states[state_id].advance_actions.push((
                 chars,
                 AdvanceAction {
@@ -141,8 +185,10 @@ impl<'a> LexTableBuilder<'a> {
             ));
         }
 
-        if let Some((completion_index, _)) = completion {
-            self.table.states[state_id].accept_action = Some(completion_index);
+        if let Some((complete_id, _)) = completion {
+            self.table.states[state_id].accept_action = Some(Symbol::terminal(complete_id));
+        } else if self.cursor.state_ids.is_empty() {
+            self.table.states[state_id].accept_action = Some(Symbol::end());
         }
     }
 }
@@ -179,11 +225,20 @@ fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
         }
     }
 
-    let final_state_replacements = (0..table.states.len()).into_iter().map(|state_id| {
-        let replacement = state_replacements.get(&state_id).cloned().unwrap_or(state_id);
-        let prior_removed = state_replacements.iter().take_while(|i| *i.0 < replacement).count();
-        replacement - prior_removed
-    }).collect::<Vec<_>>();
+    let final_state_replacements = (0..table.states.len())
+        .into_iter()
+        .map(|state_id| {
+            let replacement = state_replacements
+                .get(&state_id)
+                .cloned()
+                .unwrap_or(state_id);
+            let prior_removed = state_replacements
+                .iter()
+                .take_while(|i| *i.0 < replacement)
+                .count();
+            replacement - prior_removed
+        })
+        .collect::<Vec<_>>();
 
     for state in parse_table.states.iter_mut() {
         state.lex_state_id = final_state_replacements[state.lex_state_id];
diff --git a/src/build_tables/coincident_tokens.rs b/src/build_tables/coincident_tokens.rs
index 5f2bb3ec..ac5931e1 100644
--- a/src/build_tables/coincident_tokens.rs
+++ b/src/build_tables/coincident_tokens.rs
@@ -1,10 +1,9 @@
 use crate::grammars::LexicalGrammar;
 use crate::rules::Symbol;
 use crate::tables::{ParseStateId, ParseTable};
-use std::collections::HashSet;
 
 pub(crate) struct CoincidentTokenIndex {
-    entries: Vec<HashSet<ParseStateId>>,
+    entries: Vec<Vec<ParseStateId>>,
     n: usize,
 }
 
@@ -13,20 +12,22 @@ impl CoincidentTokenIndex {
         let n = lexical_grammar.variables.len();
         let mut result = Self {
             n,
-            entries: vec![HashSet::new(); n * n],
+            entries: vec![Vec::new(); n * n],
         };
         for (i, state) in table.states.iter().enumerate() {
             for symbol in state.terminal_entries.keys() {
                 for other_symbol in state.terminal_entries.keys() {
                     let index = result.index(*symbol, *other_symbol);
-                    result.entries[index].insert(i);
+                    if result.entries[index].last().cloned() != Some(i) {
+                        result.entries[index].push(i);
+                    }
                 }
             }
         }
         result
     }
 
-    pub fn states_with(&self, a: Symbol, b: Symbol) -> &HashSet<ParseStateId> {
+    pub fn states_with(&self, a: Symbol, b: Symbol) -> &Vec<ParseStateId> {
         &self.entries[self.index(a, b)]
     }
 
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 250218c1..624fa1e0 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -125,7 +125,7 @@ impl Generator {
             .symbols
             .iter()
             .filter(|symbol| {
-                if symbol.is_terminal() {
+                if symbol.is_terminal() || symbol.is_eof() {
                     true
                 } else if symbol.is_external() {
                     self.syntax_grammar.external_tokens[symbol.index]
@@ -359,7 +359,7 @@ impl Generator {
             add_line!(
                 self,
                 "ACCEPT_TOKEN({})",
-                self.symbol_ids[&Symbol::terminal(accept_action)]
+                self.symbol_ids[&accept_action]
             );
         }
 
@@ -462,18 +462,16 @@ impl Generator {
         let mut prev_range: Option<Range<char>> = None;
         chars
             .iter()
-            .cloned()
-            .chain(Some('\0'))
-            .filter_map(move |c| {
+            .map(|c| (*c, false))
+            .chain(Some(('\0', true)))
+            .filter_map(move |(c, done)| {
+                if done {
+                    return prev_range.clone();
+                }
                 if ruled_out_characters.contains(&(c as u32)) {
                     return None;
                 }
                 if let Some(range) = prev_range.clone() {
-                    if c == '\0' {
-                        prev_range = Some(c..c);
-                        return Some(range);
-                    }
-
                     let mut prev_range_successor = range.end as u32 + 1;
                     while prev_range_successor < c as u32 {
                         if !ruled_out_characters.contains(&prev_range_successor) {
@@ -948,6 +946,7 @@ impl Generator {
     fn add_character(&mut self, c: char) {
         if c.is_ascii() {
             match c {
+                '\0' => add!(self, "'\\0'"),
                 '\'' => add!(self, "'\\''"),
                 '\\' => add!(self, "'\\\\'"),
                 '\t' => add!(self, "'\\t'"),
diff --git a/src/rules.rs b/src/rules.rs
index ad16c632..bd0340fc 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -162,6 +162,10 @@ impl Symbol {
         self.kind == SymbolType::External
     }
 
+    pub fn is_eof(&self) -> bool {
+        self.kind == SymbolType::End
+    }
+
     pub fn non_terminal(index: usize) -> Self {
         Symbol {
             kind: SymbolType::NonTerminal,
diff --git a/src/tables.rs b/src/tables.rs
index 21222135..f400d25c 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -55,7 +55,7 @@ pub(crate) struct AdvanceAction {
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub(crate) struct LexState {
     pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
-    pub accept_action: Option<usize>,
+    pub accept_action: Option<Symbol>,
 }
 
 #[derive(Debug, PartialEq, Eq)]

From 02ca84fb4ae339753f2742d69017bdb7c39dda44 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 11:52:45 -0800
Subject: [PATCH 085/208] Add missing ';' in generated code

---
 src/render/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/render/mod.rs b/src/render/mod.rs
index 624fa1e0..dd046c93 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -358,7 +358,7 @@ impl Generator {
         if let Some(accept_action) = state.accept_action {
             add_line!(
                 self,
-                "ACCEPT_TOKEN({})",
+                "ACCEPT_TOKEN({});",
                 self.symbol_ids[&accept_action]
             );
         }

From c0f48dff6f3128d94855826e63588847dfcabb61 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 11:52:57 -0800
Subject: [PATCH 086/208] Fix incorrect NFA generation for string rules

---
 src/build_tables/build_lex_table.rs  |  6 +--
 src/prepare_grammar/expand_tokens.rs | 63 +++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index 66a4fe43..6cd9a1ce 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -148,8 +148,8 @@ impl<'a> LexTableBuilder<'a> {
             completion = Some((id, prec));
         }
 
-        info!("raw successors: {:?}", self.cursor.successors().collect::<Vec<_>>());
         let successors = self.cursor.grouped_successors();
+        info!("populate state: {}, successors: {:?}", state_id, successors);
 
         // If EOF is a valid lookahead token, add a transition predicated on the null
         // character that leads to the empty set of NFA states.
@@ -166,10 +166,6 @@ impl<'a> LexTableBuilder<'a> {
         }
 
         for (chars, advance_precedence, next_states, is_sep) in successors {
-            info!(
-                "populate state: {}, characters: {:?}, precedence: {:?}",
-                state_id, chars, advance_precedence
-            );
             if let Some((_, completed_precedence)) = completion {
                 if advance_precedence < completed_precedence {
                     continue;
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index fdf085f6..61b1897c 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -89,7 +89,8 @@ impl NfaBuilder {
             }
             Rule::String(s) => {
                 for c in s.chars().rev() {
-                    self.push_advance(CharacterSet::empty().add_char(c), self.nfa.last_state_id());
+                    self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
+                    next_state_id = self.nfa.last_state_id();
                 }
                 Ok(s.len() > 0)
             }
@@ -102,6 +103,8 @@ impl NfaBuilder {
                         alternative_state_ids.push(next_state_id);
                     }
                 }
+                alternative_state_ids.sort_unstable();
+                alternative_state_ids.dedup();
                 alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
                 for alternative_state_id in alternative_state_ids {
                     self.push_split(alternative_state_id);
@@ -542,6 +545,64 @@ mod tests {
                     ("aeeeef", Some((2, "aeeee"))),
                 ],
             },
+            Row {
+                rules: vec![
+                    Rule::seq(vec![
+                        Rule::string("a"),
+                        Rule::choice(vec![
+                            Rule::string("b"),
+                            Rule::string("c"),
+                        ]),
+                        Rule::string("d"),
+                    ])
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("abd", Some((0, "abd"))),
+                    ("acd", Some((0, "acd"))),
+                    ("abc", None),
+                    ("ad", None),
+                    ("d", None),
+                    ("a", None),
+                ]
+            },
+            // nested choices within sequences
+            Row {
+                rules: vec![
+                    Rule::seq(vec![
+                        Rule::pattern("[0-9]+"),
+                        Rule::choice(vec![
+                            Rule::Blank,
+                            Rule::choice(vec![
+                                Rule::seq(vec![
+                                    Rule::choice(vec![
+                                        Rule::string("e"),
+                                        Rule::string("E")
+                                    ]),
+                                    Rule::choice(vec![
+                                        Rule::Blank,
+                                        Rule::choice(vec![
+                                            Rule::string("+"),
+                                            Rule::string("-"),
+                                        ])
+                                    ]),
+                                    Rule::pattern("[0-9]+"),
+                                ])
+                            ])
+                        ]),
+                    ]),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("12", Some((0, "12"))),
+                    ("12e", Some((0, "12"))),
+                    ("12g", Some((0, "12"))),
+                    ("12e3", Some((0, "12e3"))),
+                    ("12e+", Some((0, "12"))),
+                    ("12E+34 +", Some((0, "12E+34"))),
+                    ("12e34", Some((0, "12e34"))),
+                ],
+            },
         ];
 
         for Row {

From 70f00d1a1e2e82582c576605d7f3e10c01345511 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 13:49:37 -0800
Subject: [PATCH 087/208] Give immediate tokens higher implicit precedence than
 other tokens

---
 src/build_tables/token_conflicts.rs           | 17 ++++++++---------
 src/grammars.rs                               |  2 +-
 src/prepare_grammar/expand_tokens.rs          | 16 +++++++++++-----
 src/prepare_grammar/extract_simple_aliases.rs |  6 +++---
 4 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/src/build_tables/token_conflicts.rs b/src/build_tables/token_conflicts.rs
index 18a80484..91edadec 100644
--- a/src/build_tables/token_conflicts.rs
+++ b/src/build_tables/token_conflicts.rs
@@ -2,6 +2,7 @@ use crate::build_tables::item::LookaheadSet;
 use crate::grammars::LexicalGrammar;
 use crate::nfa::{CharacterSet, NfaCursor};
 use hashbrown::HashSet;
+use std::cmp::Ordering;
 use std::fmt;
 
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
@@ -71,16 +72,14 @@ impl<'a> TokenConflictMap<'a> {
             return false;
         }
 
-        match (
-            grammar.variables[left.1].is_string,
-            grammar.variables[right.1].is_string,
-        ) {
-            (true, false) => return true,
-            (false, true) => return false,
-            _ => {}
+        match grammar.variables[left.1]
+            .implicit_precedence
+            .cmp(&grammar.variables[right.1].implicit_precedence)
+        {
+            Ordering::Less => false,
+            Ordering::Greater => true,
+            Ordering::Equal => left.1 < right.1,
         }
-
-        left.0 < right.0
     }
 }
 
diff --git a/src/grammars.rs b/src/grammars.rs
index 7f587a8c..f82d6b02 100644
--- a/src/grammars.rs
+++ b/src/grammars.rs
@@ -36,7 +36,7 @@ pub(crate) struct InputGrammar {
 pub(crate) struct LexicalVariable {
     pub name: String,
     pub kind: VariableType,
-    pub is_string: bool,
+    pub implicit_precedence: i32,
     pub start_state: u32,
 }
 
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 61b1897c..6520c432 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -14,11 +14,17 @@ struct NfaBuilder {
     precedence_stack: Vec<i32>,
 }
 
-fn is_string(rule: &Rule) -> bool {
+fn get_implicit_precedence(rule: &Rule) -> i32 {
     match rule {
-        Rule::String(_) => true,
-        Rule::Metadata { rule, .. } => is_string(rule),
-        _ => false,
+        Rule::String(_) => 1,
+        Rule::Metadata { rule, params } => {
+            if params.is_main_token {
+                get_implicit_precedence(rule) + 2
+            } else {
+                get_implicit_precedence(rule)
+            }
+        }
+        _ => 0,
     }
 }
 
@@ -67,7 +73,7 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
         variables.push(LexicalVariable {
             name: variable.name,
             kind: variable.kind,
-            is_string: is_string(&variable.rule),
+            implicit_precedence: get_implicit_precedence(&variable.rule),
             start_state: builder.nfa.last_state_id(),
         });
     }
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/src/prepare_grammar/extract_simple_aliases.rs
index ee748f5d..aa8b3f77 100644
--- a/src/prepare_grammar/extract_simple_aliases.rs
+++ b/src/prepare_grammar/extract_simple_aliases.rs
@@ -137,19 +137,19 @@ mod tests {
                 LexicalVariable {
                     name: "t1".to_string(),
                     kind: VariableType::Anonymous,
-                    is_string: true,
+                    implicit_precedence: 0,
                     start_state: 0,
                 },
                 LexicalVariable {
                     name: "t2".to_string(),
                     kind: VariableType::Anonymous,
-                    is_string: true,
+                    implicit_precedence: 0,
                     start_state: 0,
                 },
                 LexicalVariable {
                     name: "t3".to_string(),
                     kind: VariableType::Anonymous,
-                    is_string: true,
+                    implicit_precedence: 0,
                     start_state: 0,
                 }
             ],

From 5a7d781aaacfc6dddaef3fddf221f725cf9d12ac Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 13:49:50 -0800
Subject: [PATCH 088/208] Fix logic for identifying error recovery tokens

---
 src/build_tables/coincident_tokens.rs  |  46 ++-
 src/build_tables/mod.rs                |  54 ++-
 src/build_tables/shrink_parse_table.rs | 464 +++++++++++++------------
 3 files changed, 311 insertions(+), 253 deletions(-)

diff --git a/src/build_tables/coincident_tokens.rs b/src/build_tables/coincident_tokens.rs
index ac5931e1..62295073 100644
--- a/src/build_tables/coincident_tokens.rs
+++ b/src/build_tables/coincident_tokens.rs
@@ -1,23 +1,26 @@
 use crate::grammars::LexicalGrammar;
 use crate::rules::Symbol;
 use crate::tables::{ParseStateId, ParseTable};
+use std::fmt;
 
-pub(crate) struct CoincidentTokenIndex {
+pub(crate) struct CoincidentTokenIndex<'a> {
     entries: Vec<Vec<ParseStateId>>,
+    grammar: &'a LexicalGrammar,
     n: usize,
 }
 
-impl CoincidentTokenIndex {
-    pub fn new(table: &ParseTable, lexical_grammar: &LexicalGrammar) -> Self {
+impl<'a> CoincidentTokenIndex<'a> {
+    pub fn new(table: &ParseTable, lexical_grammar: &'a LexicalGrammar) -> Self {
         let n = lexical_grammar.variables.len();
         let mut result = Self {
             n,
+            grammar: lexical_grammar,
             entries: vec![Vec::new(); n * n],
         };
         for (i, state) in table.states.iter().enumerate() {
             for symbol in state.terminal_entries.keys() {
                 for other_symbol in state.terminal_entries.keys() {
-                    let index = result.index(*symbol, *other_symbol);
+                    let index = result.index(symbol.index, other_symbol.index);
                     if result.entries[index].last().cloned() != Some(i) {
                         result.entries[index].push(i);
                     }
@@ -28,18 +31,41 @@ impl CoincidentTokenIndex {
     }
 
     pub fn states_with(&self, a: Symbol, b: Symbol) -> &Vec<ParseStateId> {
-        &self.entries[self.index(a, b)]
+        &self.entries[self.index(a.index, b.index)]
     }
 
     pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
-        !self.entries[self.index(a, b)].is_empty()
+        !self.entries[self.index(a.index, b.index)].is_empty()
     }
 
-    fn index(&self, a: Symbol, b: Symbol) -> usize {
-        if a.index < b.index {
-            a.index * self.n + b.index
+    fn index(&self, a: usize, b: usize) -> usize {
+        if a < b {
+            a * self.n + b
         } else {
-            b.index * self.n + a.index
+            b * self.n + a
         }
     }
 }
+
+impl<'a> fmt::Debug for CoincidentTokenIndex<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "CoincidentTokenIndex {{\n")?;
+
+        write!(f, "  entries: {{\n")?;
+        for i in 0..self.n {
+            write!(f, "    {}: {{\n", self.grammar.variables[i].name)?;
+            for j in 0..self.n {
+                write!(
+                    f,
+                    "      {}: {:?},\n",
+                    self.grammar.variables[j].name,
+                    self.entries[self.index(i, j)].len()
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 207431dd..84659600 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -47,6 +47,7 @@ pub(crate) fn build_tables(
         syntax_grammar,
         simple_aliases,
         &token_conflict_map,
+        &keywords,
     );
     let (main_lex_table, keyword_lex_table) =
         build_lex_table(&mut parse_table, syntax_grammar, lexical_grammar, &keywords);
@@ -67,15 +68,22 @@ fn populate_error_state(
 ) {
     let state = &mut parse_table.states[0];
     let n = lexical_grammar.variables.len();
+
+    // First identify the *conflict-free tokens*: tokens that do not overlap with
+    // any other token in any way.
     let conflict_free_tokens = LookaheadSet::with((0..n).into_iter().filter_map(|i| {
-        let conflicts_with_other_tokens = (0..n).into_iter().all(|j| {
-            j == i
-                || coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
-                || !token_conflict_map.does_conflict(i, j)
+        let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
+            j != i
+                && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
+                && token_conflict_map.does_conflict(i, j)
         });
         if conflicts_with_other_tokens {
             None
         } else {
+            info!(
+                "error recovery - token {} has no conflicts",
+                lexical_grammar.variables[i].name
+            );
             Some(Symbol::terminal(i))
         }
     }));
@@ -85,19 +93,32 @@ fn populate_error_state(
         actions: vec![ParseAction::Recover],
     };
 
+    // Exclude from the error-recovery state any token that conflicts with one of
+    // the *conflict-free tokens* identified above.
     for i in 0..n {
         let symbol = Symbol::terminal(i);
-        let can_be_used_for_recovery = conflict_free_tokens.contains(&symbol)
-            || conflict_free_tokens.iter().all(|t| {
-                coincident_token_index.contains(symbol, t)
-                    || !token_conflict_map.does_conflict(i, t.index)
-            });
-        if can_be_used_for_recovery {
-            state
-                .terminal_entries
-                .entry(symbol)
-                .or_insert_with(|| recover_entry.clone());
+        if !conflict_free_tokens.contains(&symbol) {
+            if syntax_grammar.word_token != Some(symbol) {
+                if let Some(t) = conflict_free_tokens.iter().find(|t| {
+                    !coincident_token_index.contains(symbol, *t)
+                        && token_conflict_map.does_conflict(symbol.index, t.index)
+                }) {
+                    info!(
+                        "error recovery - exclude token {} because of conflict with {}",
+                        lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
+                    );
+                    continue;
+                }
+            }
         }
+        info!(
+            "error recovery - include token {}",
+            lexical_grammar.variables[i].name
+        );
+        state
+            .terminal_entries
+            .entry(symbol)
+            .or_insert_with(|| recover_entry.clone());
     }
 
     for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
@@ -134,7 +155,10 @@ fn identify_keywords(
             if all_chars_are_alphabetical(&cursor)
                 && token_conflict_map.does_match_same_string(i, word_token.index)
             {
-                info!("Keywords - add candidate {}", lexical_grammar.variables[i].name);
+                info!(
+                    "Keywords - add candidate {}",
+                    lexical_grammar.variables[i].name
+                );
                 Some(Symbol::terminal(i))
             } else {
                 None
diff --git a/src/build_tables/shrink_parse_table.rs b/src/build_tables/shrink_parse_table.rs
index 33b72c32..64a4b259 100644
--- a/src/build_tables/shrink_parse_table.rs
+++ b/src/build_tables/shrink_parse_table.rs
@@ -1,3 +1,4 @@
+use super::item::LookaheadSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{SyntaxGrammar, VariableType};
 use crate::rules::{AliasMap, Symbol};
@@ -9,265 +10,272 @@ pub(crate) fn shrink_parse_table(
     syntax_grammar: &SyntaxGrammar,
     simple_aliases: &AliasMap,
     token_conflict_map: &TokenConflictMap,
+    keywords: &LookaheadSet,
 ) {
-    remove_unit_reductions(parse_table, syntax_grammar, simple_aliases);
-    merge_compatible_states(parse_table, syntax_grammar, token_conflict_map);
-    remove_unused_states(parse_table);
+    let mut optimizer = Optimizer {
+        parse_table,
+        syntax_grammar,
+        token_conflict_map,
+        keywords,
+        simple_aliases,
+    };
+    optimizer.remove_unit_reductions();
+    optimizer.merge_compatible_states();
+    optimizer.remove_unused_states();
 }
 
-fn remove_unit_reductions(
-    parse_table: &mut ParseTable,
-    syntax_grammar: &SyntaxGrammar,
-    simple_aliases: &AliasMap,
-) {
-    let mut aliased_symbols = HashSet::new();
-    for variable in &syntax_grammar.variables {
-        for production in &variable.productions {
-            for step in &production.steps {
-                if step.alias.is_some() {
-                    aliased_symbols.insert(step.symbol);
+struct Optimizer<'a> {
+    parse_table: &'a mut ParseTable,
+    syntax_grammar: &'a SyntaxGrammar,
+    token_conflict_map: &'a TokenConflictMap<'a>,
+    keywords: &'a LookaheadSet,
+    simple_aliases: &'a AliasMap,
+}
+
+impl<'a> Optimizer<'a> {
+    fn remove_unit_reductions(&mut self) {
+        let mut aliased_symbols = HashSet::new();
+        for variable in &self.syntax_grammar.variables {
+            for production in &variable.productions {
+                for step in &production.steps {
+                    if step.alias.is_some() {
+                        aliased_symbols.insert(step.symbol);
+                    }
                 }
             }
         }
+
+        let mut unit_reduction_symbols_by_state = HashMap::new();
+        for (i, state) in self.parse_table.states.iter().enumerate() {
+            let mut only_unit_reductions = true;
+            let mut unit_reduction_symbol = None;
+            for (_, entry) in &state.terminal_entries {
+                for action in &entry.actions {
+                    match action {
+                        ParseAction::ShiftExtra => continue,
+                        ParseAction::Reduce {
+                            child_count: 1,
+                            alias_sequence_id: 0,
+                            symbol,
+                            ..
+                        } => {
+                            if !self.simple_aliases.contains_key(&symbol)
+                                && !aliased_symbols.contains(&symbol)
+                                && self.syntax_grammar.variables[symbol.index].kind
+                                    != VariableType::Named
+                                && (unit_reduction_symbol.is_none()
+                                    || unit_reduction_symbol == Some(symbol))
+                            {
+                                unit_reduction_symbol = Some(symbol);
+                                continue;
+                            }
+                        }
+                        _ => {}
+                    }
+                    only_unit_reductions = false;
+                    break;
+                }
+
+                if !only_unit_reductions {
+                    break;
+                }
+            }
+
+            if let Some(symbol) = unit_reduction_symbol {
+                if only_unit_reductions {
+                    unit_reduction_symbols_by_state.insert(i, *symbol);
+                }
+            }
+        }
+
+        for state in self.parse_table.states.iter_mut() {
+            let mut done = false;
+            while !done {
+                done = true;
+                state.update_referenced_states(|other_state_id, state| {
+                    if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
+                        done = false;
+                        state.nonterminal_entries[symbol]
+                    } else {
+                        other_state_id
+                    }
+                })
+            }
+        }
     }
 
-    let mut unit_reduction_symbols_by_state = HashMap::new();
-    for (i, state) in parse_table.states.iter().enumerate() {
-        let mut only_unit_reductions = true;
-        let mut unit_reduction_symbol = None;
-        for (_, entry) in &state.terminal_entries {
-            for action in &entry.actions {
-                match action {
-                    ParseAction::ShiftExtra => continue,
-                    ParseAction::Reduce {
-                        child_count: 1,
-                        alias_sequence_id: 0,
-                        symbol,
-                        ..
-                    } => {
-                        if !simple_aliases.contains_key(&symbol)
-                            && !aliased_symbols.contains(&symbol)
-                            && syntax_grammar.variables[symbol.index].kind != VariableType::Named
-                            && (unit_reduction_symbol.is_none()
-                                || unit_reduction_symbol == Some(symbol))
-                        {
-                            unit_reduction_symbol = Some(symbol);
+    fn merge_compatible_states(&mut self) {
+        let mut state_ids_by_signature = HashMap::new();
+        for (i, state) in self.parse_table.states.iter().enumerate() {
+            state_ids_by_signature
+                .entry(state.unfinished_item_signature)
+                .or_insert(Vec::new())
+                .push(i);
+        }
+
+        let mut deleted_states = HashSet::new();
+        loop {
+            let mut state_replacements = HashMap::new();
+            for (_, state_ids) in &state_ids_by_signature {
+                for i in state_ids {
+                    for j in state_ids {
+                        if j == i {
+                            break;
+                        }
+                        if deleted_states.contains(j) || deleted_states.contains(i) {
                             continue;
                         }
+                        if self.merge_parse_state(*j, *i) {
+                            deleted_states.insert(*i);
+                            state_replacements.insert(*i, *j);
+                        }
                     }
-                    _ => {}
                 }
-                only_unit_reductions = false;
+            }
+
+            if state_replacements.is_empty() {
                 break;
             }
 
-            if !only_unit_reductions {
-                break;
-            }
-        }
-
-        if let Some(symbol) = unit_reduction_symbol {
-            if only_unit_reductions {
-                unit_reduction_symbols_by_state.insert(i, *symbol);
+            for state in self.parse_table.states.iter_mut() {
+                state.update_referenced_states(|other_state_id, _| {
+                    *state_replacements
+                        .get(&other_state_id)
+                        .unwrap_or(&other_state_id)
+                });
             }
         }
     }
 
-    for state in parse_table.states.iter_mut() {
-        let mut done = false;
-        while !done {
-            done = true;
-            state.update_referenced_states(|other_state_id, state| {
-                if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
-                    done = false;
-                    state.nonterminal_entries[symbol]
-                } else {
-                    other_state_id
-                }
-            })
-        }
-    }
-}
+    fn merge_parse_state(&mut self, left: usize, right: usize) -> bool {
+        let left_state = &self.parse_table.states[left];
+        let right_state = &self.parse_table.states[right];
 
-fn merge_compatible_states(
-    parse_table: &mut ParseTable,
-    syntax_grammar: &SyntaxGrammar,
-    token_conflict_map: &TokenConflictMap,
-) {
-    let mut state_ids_by_signature = HashMap::new();
-    for (i, state) in parse_table.states.iter().enumerate() {
-        state_ids_by_signature
-            .entry(state.unfinished_item_signature)
-            .or_insert(Vec::new())
-            .push(i);
-    }
-
-    let mut deleted_states = HashSet::new();
-    loop {
-        let mut state_replacements = HashMap::new();
-        for (_, state_ids) in &state_ids_by_signature {
-            for i in state_ids {
-                for j in state_ids {
-                    if j == i {
-                        break;
-                    }
-                    if deleted_states.contains(j) || deleted_states.contains(i) {
-                        continue;
-                    }
-                    if merge_parse_state(syntax_grammar, token_conflict_map, parse_table, *j, *i) {
-                        deleted_states.insert(*i);
-                        state_replacements.insert(*i, *j);
-                    }
-                }
-            }
-        }
-
-        if state_replacements.is_empty() {
-            break;
-        }
-
-        for state in parse_table.states.iter_mut() {
-            state.update_referenced_states(|other_state_id, _| {
-                *state_replacements
-                    .get(&other_state_id)
-                    .unwrap_or(&other_state_id)
-            });
-        }
-    }
-}
-
-fn merge_parse_state(
-    syntax_grammar: &SyntaxGrammar,
-    token_conflict_map: &TokenConflictMap,
-    parse_table: &mut ParseTable,
-    left: usize,
-    right: usize,
-) -> bool {
-    let left_state = &parse_table.states[left];
-    let right_state = &parse_table.states[right];
-
-    if left_state.nonterminal_entries != right_state.nonterminal_entries {
-        return false;
-    }
-
-    for (symbol, left_entry) in &left_state.terminal_entries {
-        if let Some(right_entry) = right_state.terminal_entries.get(symbol) {
-            if right_entry.actions != left_entry.actions {
-                return false;
-            }
-        } else if !can_add_entry_to_state(
-            syntax_grammar,
-            token_conflict_map,
-            right_state,
-            *symbol,
-            left_entry,
-        ) {
+        if left_state.nonterminal_entries != right_state.nonterminal_entries {
             return false;
         }
-    }
 
-    let mut symbols_to_add = Vec::new();
-    for (symbol, right_entry) in &right_state.terminal_entries {
-        if !left_state.terminal_entries.contains_key(&symbol) {
-            if !can_add_entry_to_state(
-                syntax_grammar,
-                token_conflict_map,
-                left_state,
-                *symbol,
-                right_entry,
-            ) {
-                return false;
-            }
-            symbols_to_add.push(*symbol);
-        }
-    }
-
-    for symbol in symbols_to_add {
-        let entry = parse_table.states[right].terminal_entries[&symbol].clone();
-        parse_table.states[left]
-            .terminal_entries
-            .insert(symbol, entry);
-    }
-
-    true
-}
-
-fn can_add_entry_to_state(
-    syntax_grammar: &SyntaxGrammar,
-    token_conflict_map: &TokenConflictMap,
-    state: &ParseState,
-    token: Symbol,
-    entry: &ParseTableEntry,
-) -> bool {
-    // Do not add external tokens; they could conflict lexically with any of the state's
-    // existing lookahead tokens.
-    if token.is_external() {
-        return false;
-    }
-
-    // Only merge parse states by allowing existing reductions to happen
-    // with additional lookahead tokens. Do not alter parse states in ways
-    // that allow entirely new types of actions to happen.
-    if state.terminal_entries.iter().all(|(_, e)| e != entry) {
-        return false;
-    }
-    match entry.actions.last() {
-        Some(ParseAction::Reduce { .. }) => {}
-        _ => return false,
-    }
-
-    // Do not add tokens which are both internal and external. Their validity could
-    // influence the behavior of the external scanner.
-    if syntax_grammar
-        .external_tokens
-        .iter()
-        .any(|t| t.corresponding_internal_token == Some(token))
-    {
-        return false;
-    }
-
-    // Do not add a token if it conflicts with an existing token.
-    if token.is_terminal() {
-        for existing_token in state.terminal_entries.keys() {
-            if token_conflict_map.does_conflict(token.index, existing_token.index) {
+        for (symbol, left_entry) in &left_state.terminal_entries {
+            if let Some(right_entry) = right_state.terminal_entries.get(symbol) {
+                if right_entry.actions != left_entry.actions {
+                    return false;
+                }
+            } else if !self.can_add_entry_to_state(right_state, *symbol, left_entry) {
                 return false;
             }
         }
+
+        let mut symbols_to_add = Vec::new();
+        for (symbol, right_entry) in &right_state.terminal_entries {
+            if !left_state.terminal_entries.contains_key(&symbol) {
+                if !self.can_add_entry_to_state(left_state, *symbol, right_entry) {
+                    return false;
+                }
+                symbols_to_add.push(*symbol);
+            }
+        }
+
+        for symbol in symbols_to_add {
+            let entry = self.parse_table.states[right].terminal_entries[&symbol].clone();
+            self.parse_table.states[left]
+                .terminal_entries
+                .insert(symbol, entry);
+        }
+
+        true
     }
 
-    true
-}
-
-fn remove_unused_states(parse_table: &mut ParseTable) {
-    let mut state_usage_map = vec![false; parse_table.states.len()];
-
-    state_usage_map[0] = true;
-    state_usage_map[1] = true;
-
-    for state in &parse_table.states {
-        for referenced_state in state.referenced_states() {
-            state_usage_map[referenced_state] = true;
+    fn can_add_entry_to_state(
+        &self,
+        state: &ParseState,
+        token: Symbol,
+        entry: &ParseTableEntry,
+    ) -> bool {
+        // Do not add external tokens; they could conflict lexically with any of the state's
+        // existing lookahead tokens.
+        if token.is_external() {
+            return false;
         }
+
+        // Only merge_compatible_states parse states by allowing existing reductions to happen
+        // with additional lookahead tokens. Do not alter parse states in ways
+        // that allow entirely new types of actions to happen.
+        if state.terminal_entries.iter().all(|(_, e)| e != entry) {
+            return false;
+        }
+        match entry.actions.last() {
+            Some(ParseAction::Reduce { .. }) => {}
+            _ => return false,
+        }
+
+        // Do not add tokens which are both internal and external. Their validity could
+        // influence the behavior of the external scanner.
+        if self
+            .syntax_grammar
+            .external_tokens
+            .iter()
+            .any(|t| t.corresponding_internal_token == Some(token))
+        {
+            return false;
+        }
+
+        let is_word_token = self.syntax_grammar.word_token == Some(token);
+        let is_keyword = self.keywords.contains(&token);
+
+        // Do not add a token if it conflicts with an existing token.
+        if token.is_terminal() {
+            for existing_token in state.terminal_entries.keys() {
+                if (is_word_token && self.keywords.contains(existing_token))
+                    || is_keyword && self.syntax_grammar.word_token.as_ref() == Some(existing_token)
+                {
+                    continue;
+                }
+                if self
+                    .token_conflict_map
+                    .does_conflict(token.index, existing_token.index)
+                    || self
+                        .token_conflict_map
+                        .does_match_same_string(token.index, existing_token.index)
+                {
+                    return false;
+                }
+            }
+        }
+
+        true
     }
-    let mut removed_predecessor_count = 0;
-    let mut state_replacement_map = vec![0; parse_table.states.len()];
-    for state_id in 0..parse_table.states.len() {
-        state_replacement_map[state_id] = state_id - removed_predecessor_count;
-        if !state_usage_map[state_id] {
-            removed_predecessor_count += 1;
+
+    fn remove_unused_states(&mut self) {
+        let mut state_usage_map = vec![false; self.parse_table.states.len()];
+
+        state_usage_map[0] = true;
+        state_usage_map[1] = true;
+
+        for state in &self.parse_table.states {
+            for referenced_state in state.referenced_states() {
+                state_usage_map[referenced_state] = true;
+            }
         }
-    }
-    let mut state_id = 0;
-    let mut original_state_id = 0;
-    while state_id < parse_table.states.len() {
-        if state_usage_map[original_state_id] {
-            parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
-                state_replacement_map[other_state_id]
-            });
-            state_id += 1;
-        } else {
-            parse_table.states.remove(state_id);
+        let mut removed_predecessor_count = 0;
+        let mut state_replacement_map = vec![0; self.parse_table.states.len()];
+        for state_id in 0..self.parse_table.states.len() {
+            state_replacement_map[state_id] = state_id - removed_predecessor_count;
+            if !state_usage_map[state_id] {
+                removed_predecessor_count += 1;
+            }
+        }
+        let mut state_id = 0;
+        let mut original_state_id = 0;
+        while state_id < self.parse_table.states.len() {
+            if state_usage_map[original_state_id] {
+                self.parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
+                    state_replacement_map[other_state_id]
+                });
+                state_id += 1;
+            } else {
+                self.parse_table.states.remove(state_id);
+            }
+            original_state_id += 1;
         }
-        original_state_id += 1;
     }
 }

From 5d3d161c057f112baed490bb767f16cfecde9948 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 14:08:24 -0800
Subject: [PATCH 089/208] Respect simple aliases in code gen

---
 src/render/mod.rs | 58 +++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/src/render/mod.rs b/src/render/mod.rs
index dd046c93..0c0e6e59 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -233,12 +233,13 @@ impl Generator {
         indent!(self);
         for symbol in self.parse_table.symbols.iter() {
             if *symbol != Symbol::end() {
-                add_line!(
-                    self,
-                    "[{}] = \"{}\",",
-                    self.symbol_ids[&symbol],
-                    self.sanitize_string(self.metadata_for_symbol(*symbol).0)
+                let name = self.sanitize_string(
+                    self.simple_aliases
+                        .get(symbol)
+                        .map(|alias| alias.value.as_str())
+                        .unwrap_or(self.metadata_for_symbol(*symbol).0),
                 );
+                add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name);
             }
         }
         for (alias, symbol) in &self.alias_map {
@@ -265,22 +266,27 @@ impl Generator {
         for symbol in &self.parse_table.symbols {
             add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
             indent!(self);
-            match self.metadata_for_symbol(*symbol).1 {
-                VariableType::Named => {
-                    add_line!(self, ".visible = true,");
-                    add_line!(self, ".named = true,");
-                }
-                VariableType::Anonymous => {
-                    add_line!(self, ".visible = true,");
-                    add_line!(self, ".named = false,");
-                }
-                VariableType::Hidden => {
-                    add_line!(self, ".visible = false,");
-                    add_line!(self, ".named = true,");
-                }
-                VariableType::Auxiliary => {
-                    add_line!(self, ".visible = false,");
-                    add_line!(self, ".named = false,");
+            if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) {
+                add_line!(self, ".visible = true,");
+                add_line!(self, ".named = {},", is_named);
+            } else {
+                match self.metadata_for_symbol(*symbol).1 {
+                    VariableType::Named => {
+                        add_line!(self, ".visible = true,");
+                        add_line!(self, ".named = true,");
+                    }
+                    VariableType::Anonymous => {
+                        add_line!(self, ".visible = true,");
+                        add_line!(self, ".named = false,");
+                    }
+                    VariableType::Hidden => {
+                        add_line!(self, ".visible = false,");
+                        add_line!(self, ".named = true,");
+                    }
+                    VariableType::Auxiliary => {
+                        add_line!(self, ".visible = false,");
+                        add_line!(self, ".named = false,");
+                    }
                 }
             }
             dedent!(self);
@@ -356,11 +362,7 @@ impl Generator {
 
     fn add_lex_state(&mut self, state: LexState) {
         if let Some(accept_action) = state.accept_action {
-            add_line!(
-                self,
-                "ACCEPT_TOKEN({});",
-                self.symbol_ids[&accept_action]
-            );
+            add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
         }
 
         let mut ruled_out_characters = HashSet::new();
@@ -397,7 +399,9 @@ impl Generator {
                 self.add_character_range_conditions(ranges, false)
             }
             CharacterSet::Exclude(chars) => {
-                let ranges = Self::get_ranges(chars, ruled_out_characters);
+                let ranges = Some('\0'..'\0')
+                    .into_iter()
+                    .chain(Self::get_ranges(chars, ruled_out_characters));
                 self.add_character_range_conditions(ranges, true)
             }
         }

From bf9556dadc470dd2c543f9aab94070cc801e3d96 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 3 Jan 2019 16:35:16 -0800
Subject: [PATCH 090/208] Fix recursive processing of rule inlining

---
 src/build_tables/build_lex_table.rs    |  10 +-
 src/build_tables/build_parse_table.rs  |  40 ++--
 src/build_tables/item.rs               |  48 ++--
 src/build_tables/item_set_builder.rs   |  30 ++-
 src/prepare_grammar/process_inlines.rs | 311 ++++++++++++-------------
 5 files changed, 230 insertions(+), 209 deletions(-)

diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index 6cd9a1ce..60810f83 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -23,7 +23,7 @@ pub(crate) fn build_lex_table(
     }
 
     let mut builder = LexTableBuilder::new(lexical_grammar);
-    for (i, state) in parse_table.states.iter_mut().enumerate() {
+    for state in parse_table.states.iter_mut() {
         let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
             if token.is_terminal() {
                 if keywords.contains(&token) {
@@ -37,7 +37,6 @@ pub(crate) fn build_lex_table(
                 None
             }
         }));
-        info!("populate lex state for parse state {}", i);
         state.lex_state_id = builder.add_state_for_tokens(&tokens);
     }
 
@@ -199,16 +198,17 @@ fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
                 continue;
             }
             for (j, state_j) in table.states.iter().enumerate() {
-                if state_replacements.contains_key(&j) {
-                    continue;
-                }
                 if j == i {
                     break;
                 }
+                if state_replacements.contains_key(&j) {
+                    continue;
+                }
                 if state_i == state_j {
                     info!("replace state {} with state {}", i, j);
                     state_replacements.insert(i, j);
                     done = false;
+                    break;
                 }
             }
         }
diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index ada34dff..6f930463 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -63,7 +63,28 @@ impl<'a> ParseTableBuilder<'a> {
             ),
         );
 
-        self.process_part_state_queue()?;
+        while let Some(entry) = self.parse_state_queue.pop_front() {
+            // info!(
+            //     "state: {}, item set: {}",
+            //     entry.state_id,
+            //     ParseItemSetDisplay(
+            //         &self.item_sets_by_state_id[entry.state_id],
+            //         self.syntax_grammar,
+            //         self.lexical_grammar,
+            //     )
+            // );
+
+            let item_set = self
+                .item_set_builder
+                .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
+            self.add_actions(
+                entry.preceding_symbols,
+                entry.preceding_auxiliary_symbols,
+                entry.state_id,
+                item_set,
+            )?;
+        }
+
         self.populate_used_symbols();
         self.remove_precedences();
 
@@ -116,27 +137,12 @@ impl<'a> ParseTableBuilder<'a> {
         }
     }
 
-    fn process_part_state_queue(&mut self) -> Result<()> {
-        while let Some(entry) = self.parse_state_queue.pop_front() {
-            let item_set = self
-                .item_set_builder
-                .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
-            self.add_actions(
-                entry.preceding_symbols,
-                entry.preceding_auxiliary_symbols,
-                item_set,
-                entry.state_id,
-            )?;
-        }
-        Ok(())
-    }
-
     fn add_actions(
         &mut self,
         mut preceding_symbols: SymbolSequence,
         mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
-        item_set: ParseItemSet<'a>,
         state_id: ParseStateId,
+        item_set: ParseItemSet<'a>,
     ) -> Result<()> {
         let mut terminal_successors = HashMap::new();
         let mut non_terminal_successors = HashMap::new();
diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index 511d7bef..d1d0cbbf 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -42,12 +42,19 @@ pub(crate) struct ParseItemSet<'a> {
     pub entries: BTreeMap<ParseItem<'a>, LookaheadSet>,
 }
 
-pub(crate) struct ParseItemDisplay<'a>(&'a ParseItem<'a>, &'a SyntaxGrammar, &'a LexicalGrammar);
+pub(crate) struct ParseItemDisplay<'a>(
+    pub &'a ParseItem<'a>,
+    pub &'a SyntaxGrammar,
+    pub &'a LexicalGrammar
+);
+
 pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
+
+#[allow(dead_code)]
 pub(crate) struct ParseItemSetDisplay<'a>(
-    &'a ParseItemSet<'a>,
-    &'a SyntaxGrammar,
-    &'a LexicalGrammar,
+    pub &'a ParseItemSet<'a>,
+    pub &'a SyntaxGrammar,
+    pub &'a LexicalGrammar,
 );
 
 impl LookaheadSet {
@@ -144,14 +151,6 @@ impl LookaheadSet {
         }
         result
     }
-
-    pub fn display_with<'a>(
-        &'a self,
-        syntax_grammar: &'a SyntaxGrammar,
-        lexical_grammar: &'a LexicalGrammar,
-    ) -> LookaheadSetDisplay<'a> {
-        LookaheadSetDisplay(self, syntax_grammar, lexical_grammar)
-    }
 }
 
 impl<'a> ParseItem<'a> {
@@ -202,14 +201,6 @@ impl<'a> ParseItem<'a> {
             step_index: self.step_index + 1,
         }
     }
-
-    pub fn display_with(
-        &'a self,
-        syntax_grammar: &'a SyntaxGrammar,
-        lexical_grammar: &'a LexicalGrammar,
-    ) -> ParseItemDisplay<'a> {
-        ParseItemDisplay(self, syntax_grammar, lexical_grammar)
-    }
 }
 
 impl<'a> ParseItemSet<'a> {
@@ -235,14 +226,6 @@ impl<'a> ParseItemSet<'a> {
             }
         }
     }
-
-    pub fn display_with(
-        &'a self,
-        syntax_grammar: &'a SyntaxGrammar,
-        lexical_grammar: &'a LexicalGrammar,
-    ) -> ParseItemSetDisplay<'a> {
-        ParseItemSetDisplay(self, syntax_grammar, lexical_grammar)
-    }
 }
 
 impl<'a> Default for ParseItemSet<'a> {
@@ -253,6 +236,7 @@ impl<'a> Default for ParseItemSet<'a> {
     }
 }
 
+#[allow(dead_code)]
 impl<'a> fmt::Display for ParseItemDisplay<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         if self.0.is_augmented() {
@@ -282,6 +266,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
             } else {
                 write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
             }
+
+            if let Some(alias) = &step.alias {
+                write!(f, " (alias {})", alias.value)?;
+            }
         }
 
         if self.0.is_done() {
@@ -323,8 +311,8 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
             writeln!(
                 f,
                 "{}\t{}",
-                item.display_with(self.1, self.2),
-                lookaheads.display_with(self.1, self.2)
+                ParseItemDisplay(item, self.1, self.2),
+                LookaheadSetDisplay(lookaheads, self.1, self.2)
             )?;
         }
         Ok(())
diff --git a/src/build_tables/item_set_builder.rs b/src/build_tables/item_set_builder.rs
index 5714e7e2..939d700c 100644
--- a/src/build_tables/item_set_builder.rs
+++ b/src/build_tables/item_set_builder.rs
@@ -1,7 +1,8 @@
-use super::item::{LookaheadSet, ParseItem, ParseItemSet};
+use super::item::{LookaheadSet, ParseItem, ParseItemDisplay, ParseItemSet};
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
 use crate::rules::Symbol;
 use hashbrown::{HashMap, HashSet};
+use std::fmt;
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 struct TransitiveClosureAddition<'a> {
@@ -16,6 +17,8 @@ struct FollowSetInfo {
 }
 
 pub(crate) struct ParseItemSetBuilder<'a> {
+    syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
     first_sets: HashMap<Symbol, LookaheadSet>,
     last_sets: HashMap<Symbol, LookaheadSet>,
     inlines: &'a InlinedProductionMap,
@@ -35,6 +38,8 @@ impl<'a> ParseItemSetBuilder<'a> {
         inlines: &'a InlinedProductionMap,
     ) -> Self {
         let mut result = Self {
+            syntax_grammar,
+            lexical_grammar,
             first_sets: HashMap::new(),
             last_sets: HashMap::new(),
             inlines,
@@ -300,3 +305,26 @@ impl<'a> ParseItemSetBuilder<'a> {
         set.entries.insert(item, lookaheads.clone());
     }
 }
+
+impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ParseItemSetBuilder {{\n")?;
+
+        write!(f, "  additions: {{\n")?;
+        for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
+            write!(f, "    {}: {{\n", variable.name)?;
+            for addition in &self.transitive_closure_additions[i] {
+                write!(
+                    f,
+                    "      {}\n",
+                    ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
+                )?;
+            }
+            write!(f, "    }},\n")?;
+        }
+        write!(f, "  }},")?;
+
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
diff --git a/src/prepare_grammar/process_inlines.rs b/src/prepare_grammar/process_inlines.rs
index 24bbc14d..9fd2f2c6 100644
--- a/src/prepare_grammar/process_inlines.rs
+++ b/src/prepare_grammar/process_inlines.rs
@@ -3,6 +3,9 @@ use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 struct ProductionStepId {
+    // A `None` value here means that the production itself was produced via inlining,
+    // and is stored in the the builder's `productions` vector, as opposed to being
+    // stored in one of the grammar's variables.
     variable_index: Option<usize>,
     production_index: usize,
     step_index: usize,
@@ -13,169 +16,166 @@ struct InlinedProductionMapBuilder {
     productions: Vec<Production>,
 }
 
-impl ProductionStepId {
-    pub fn successor(&self) -> Self {
-        Self {
-            variable_index: self.variable_index,
-            production_index: self.production_index,
-            step_index: self.step_index + 1,
-        }
-    }
-}
-
-fn production_for_id<'a>(
-    map: &'a InlinedProductionMapBuilder,
-    id: ProductionStepId,
-    grammar: &'a SyntaxGrammar,
-) -> &'a Production {
-    if let Some(variable_index) = id.variable_index {
-        &grammar.variables[variable_index].productions[id.production_index]
-    } else {
-        &map.productions[id.production_index]
-    }
-}
-
-fn production_step_for_id<'a>(
-    map: &'a InlinedProductionMapBuilder,
-    id: ProductionStepId,
-    grammar: &'a SyntaxGrammar,
-) -> Option<&'a ProductionStep> {
-    production_for_id(map, id, grammar).steps.get(id.step_index)
-}
-
-fn inline<'a>(
-    map: &'a mut InlinedProductionMapBuilder,
-    step_id: ProductionStepId,
-    grammar: &'a SyntaxGrammar,
-) -> &'a Vec<usize> {
-    let step = production_step_for_id(map, step_id, grammar).unwrap();
-    let mut productions_to_add = grammar.variables[step.symbol.index].productions.clone();
-
-    let mut i = 0;
-    while i < productions_to_add.len() {
-        if let Some(first_symbol) = productions_to_add[i].first_symbol() {
-            if grammar.variables_to_inline.contains(&first_symbol) {
-                // Remove the production from the vector, replacing it with a placeholder.
-                let production = productions_to_add
-                    .splice(i..i + 1, [Production::default()].iter().cloned())
-                    .next()
-                    .unwrap();
-
-                // Replace the placeholder with the inlined productions.
-                productions_to_add.splice(
-                    i..i + 1,
-                    grammar.variables[first_symbol.index]
-                        .productions
-                        .iter()
-                        .map(|p| {
-                            let mut p = p.clone();
-                            p.steps.extend(production.steps[1..].iter().cloned());
-                            p
-                        }),
-                );
-                continue;
-            }
-        }
-        i += 1;
-    }
-
-    let result = productions_to_add
-        .into_iter()
-        .map(|production_to_add| {
-            let mut inlined_production = production_for_id(&map, step_id, grammar).clone();
-            let removed_step = inlined_production
-                .steps
-                .splice(
-                    step_id.step_index..step_id.step_index + 1,
-                    production_to_add.steps.iter().cloned(),
-                )
-                .next()
-                .unwrap();
-            let inserted_steps = &mut inlined_production.steps
-                [step_id.step_index..step_id.step_index + production_to_add.steps.len()];
-            if let Some(alias) = removed_step.alias {
-                for inserted_step in inserted_steps.iter_mut() {
-                    inserted_step.alias = Some(alias.clone());
-                }
-            }
-            if let Some(last_inserted_step) = inserted_steps.last_mut() {
-                last_inserted_step.precedence = removed_step.precedence;
-                last_inserted_step.associativity = removed_step.associativity;
-            }
-            map.productions
-                .iter()
-                .position(|p| *p == inlined_production)
-                .unwrap_or({
-                    map.productions.push(inlined_production);
-                    map.productions.len() - 1
-                })
-        })
-        .collect();
-
-    map.production_indices_by_step_id
-        .entry(step_id)
-        .or_insert(result)
-}
-
-pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
-    let mut result = InlinedProductionMapBuilder {
-        productions: Vec::new(),
-        production_indices_by_step_id: HashMap::new(),
-    };
-
-    let mut step_ids_to_process = Vec::new();
-    for (variable_index, variable) in grammar.variables.iter().enumerate() {
-        for production_index in 0..variable.productions.len() {
-            step_ids_to_process.push(ProductionStepId {
-                variable_index: Some(variable_index),
-                production_index,
-                step_index: 0,
-            });
-            while !step_ids_to_process.is_empty() {
-                let mut i = 0;
-                while i < step_ids_to_process.len() {
-                    let step_id = step_ids_to_process[i];
-                    if let Some(step) = production_step_for_id(&result, step_id, grammar) {
-                        if grammar.variables_to_inline.contains(&step.symbol) {
-                            let inlined_step_ids = inline(&mut result, step_id, grammar)
-                                .into_iter()
-                                .cloned()
-                                .map(|production_index| ProductionStepId {
-                                    variable_index: None,
-                                    production_index,
-                                    step_index: step_id.step_index,
-                                })
-                                .collect::<Vec<_>>();
-                            step_ids_to_process.splice(i..i + 1, inlined_step_ids);
+impl InlinedProductionMapBuilder {
+    fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap {
+        let mut step_ids_to_process = Vec::new();
+        for (variable_index, variable) in grammar.variables.iter().enumerate() {
+            for production_index in 0..variable.productions.len() {
+                step_ids_to_process.push(ProductionStepId {
+                    variable_index: Some(variable_index),
+                    production_index,
+                    step_index: 0,
+                });
+                while !step_ids_to_process.is_empty() {
+                    let mut i = 0;
+                    while i < step_ids_to_process.len() {
+                        let step_id = step_ids_to_process[i];
+                        if let Some(step) = self.production_step_for_id(step_id, grammar) {
+                            if grammar.variables_to_inline.contains(&step.symbol) {
+                                let inlined_step_ids = self
+                                    .inline_production_at_step(step_id, grammar)
+                                    .into_iter()
+                                    .cloned()
+                                    .map(|production_index| ProductionStepId {
+                                        variable_index: None,
+                                        production_index,
+                                        step_index: step_id.step_index,
+                                    });
+                                step_ids_to_process.splice(i..i + 1, inlined_step_ids);
+                            } else {
+                                step_ids_to_process[i] = ProductionStepId {
+                                    variable_index: step_id.variable_index,
+                                    production_index: step_id.production_index,
+                                    step_index: step_id.step_index + 1,
+                                };
+                                i += 1;
+                            }
                         } else {
-                            step_ids_to_process[i] = step_id.successor();
-                            i += 1;
+                            step_ids_to_process.remove(i);
                         }
-                    } else {
-                        step_ids_to_process.remove(i);
                     }
                 }
             }
         }
+
+        let productions = self.productions;
+        let production_indices_by_step_id = self.production_indices_by_step_id;
+        let production_map = production_indices_by_step_id
+            .into_iter()
+            .map(|(step_id, production_indices)| {
+                let production = if let Some(variable_index) = step_id.variable_index {
+                    &grammar.variables[variable_index].productions[step_id.production_index]
+                } else {
+                    &productions[step_id.production_index]
+                } as *const Production;
+                ((production, step_id.step_index as u32), production_indices)
+            })
+            .collect();
+
+        InlinedProductionMap {
+            productions,
+            production_map,
+        }
     }
 
-    // result
-    let productions = result.productions;
-    let production_indices_by_step_id = result.production_indices_by_step_id;
+    fn inline_production_at_step<'a>(
+        &'a mut self,
+        step_id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> &'a Vec<usize> {
+        // Build a list of productions produced by inlining rules.
+        let mut i = 0;
+        let step_index = step_id.step_index;
+        let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()];
+        while i < productions_to_add.len() {
+            if let Some(step) = productions_to_add[i].steps.get(step_index) {
+                let symbol = step.symbol.clone();
 
-    let production_map = production_indices_by_step_id
-        .into_iter()
-        .map(|(step_id, production_indices)| {
-            let production = if let Some(variable_index) = step_id.variable_index {
-                &grammar.variables[variable_index].productions[step_id.production_index]
-            } else {
-                &productions[step_id.production_index]
-            } as *const Production;
-            ((production, step_id.step_index as u32), production_indices)
-        })
-        .collect();
+                if grammar.variables_to_inline.contains(&symbol) {
+                    // Remove the production from the vector, replacing it with a placeholder.
+                    let production = productions_to_add
+                        .splice(i..i + 1, [Production::default()].iter().cloned())
+                        .next()
+                        .unwrap();
 
-    InlinedProductionMap { productions, production_map }
+                    // Replace the placeholder with the inlined productions.
+                    productions_to_add.splice(
+                        i..i + 1,
+                        grammar.variables[symbol.index].productions.iter().map(|p| {
+                            let mut production = production.clone();
+                            let removed_step = production
+                                .steps
+                                .splice(step_index..(step_index + 1), p.steps.iter().cloned())
+                                .next()
+                                .unwrap();
+                            let inserted_steps =
+                                &mut production.steps[step_index..(step_index + p.steps.len())];
+                            if let Some(alias) = removed_step.alias {
+                                for inserted_step in inserted_steps.iter_mut() {
+                                    inserted_step.alias = Some(alias.clone());
+                                }
+                            }
+                            if let Some(last_inserted_step) = inserted_steps.last_mut() {
+                                last_inserted_step.precedence = removed_step.precedence;
+                                last_inserted_step.associativity = removed_step.associativity;
+                            }
+                            production
+                        }),
+                    );
+
+                    continue;
+                }
+            }
+            i += 1;
+        }
+
+        // Store all the computed productions.
+        let result = productions_to_add
+            .into_iter()
+            .map(|production| {
+                self.productions
+                    .iter()
+                    .position(|p| *p == production)
+                    .unwrap_or({
+                        self.productions.push(production);
+                        self.productions.len() - 1
+                    })
+            })
+            .collect();
+
+        // Cache these productions based on the original production step.
+        self.production_indices_by_step_id
+            .entry(step_id)
+            .or_insert(result)
+    }
+
+    fn production_for_id<'a>(
+        &'a self,
+        id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> &'a Production {
+        if let Some(variable_index) = id.variable_index {
+            &grammar.variables[variable_index].productions[id.production_index]
+        } else {
+            &self.productions[id.production_index]
+        }
+    }
+
+    fn production_step_for_id<'a>(
+        &'a self,
+        id: ProductionStepId,
+        grammar: &'a SyntaxGrammar,
+    ) -> Option<&'a ProductionStep> {
+        self.production_for_id(id, grammar).steps.get(id.step_index)
+    }
+}
+
+pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
+    InlinedProductionMapBuilder {
+        productions: Vec::new(),
+        production_indices_by_step_id: HashMap::new(),
+    }
+    .build(grammar)
 }
 
 #[cfg(test)]
@@ -234,7 +234,7 @@ mod tests {
         // Inlining variable 1 yields two productions.
         assert_eq!(
             inline_map
-            .inlined_productions(&grammar.variables[0].productions[0], 1)
+                .inlined_productions(&grammar.variables[0].productions[0], 1)
                 .unwrap()
                 .cloned()
                 .collect::<Vec<_>>(),
@@ -446,8 +446,7 @@ mod tests {
                     ProductionStep::new(Symbol::terminal(12))
                         .with_prec(1, Some(Associativity::Left)),
                     ProductionStep::new(Symbol::terminal(10)),
-                    ProductionStep::new(Symbol::non_terminal(2))
-                        .with_alias("outer_alias", true),
+                    ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true),
                 ]
             }],
         );

From 70aa4c2b2d97fbcf6e330f85e4d4fd0df026cfce Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 09:11:44 -0800
Subject: [PATCH 091/208] Add a --no-minimize flag to suppress table
 minimization for debugging

---
 src/build_tables/build_lex_table.rs           | 22 ++++++++++++++-----
 src/build_tables/build_parse_table.rs         | 13 ++++++++++-
 ...parse_table.rs => minimize_parse_table.rs} | 14 ++++++------
 src/build_tables/mod.rs                       | 22 +++++++++++++------
 src/generate.rs                               |  5 +++--
 src/main.rs                                   |  6 +++--
 6 files changed, 58 insertions(+), 24 deletions(-)
 rename src/build_tables/{shrink_parse_table.rs => minimize_parse_table.rs} (97%)

diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index 60810f83..9c440f4e 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -12,6 +12,7 @@ pub(crate) fn build_lex_table(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
     keywords: &LookaheadSet,
+    minimize: bool,
 ) -> (LexTable, LexTable) {
     let keyword_lex_table;
     if syntax_grammar.word_token.is_some() {
@@ -41,7 +42,10 @@ pub(crate) fn build_lex_table(
     }
 
     let mut table = builder.table;
-    shrink_lex_table(&mut table, parse_table);
+
+    if minimize {
+        minimize_lex_table(&mut table, parse_table);
+    }
 
     (table, keyword_lex_table)
 }
@@ -147,14 +151,20 @@ impl<'a> LexTableBuilder<'a> {
             completion = Some((id, prec));
         }
 
+        info!(
+            "lex state: {}, completion: {:?}",
+            state_id,
+            completion.map(|(id, prec)| (&self.lexical_grammar.variables[id].name, prec))
+        );
+
         let successors = self.cursor.grouped_successors();
-        info!("populate state: {}, successors: {:?}", state_id, successors);
+        info!("lex state: {}, successors: {:?}", state_id, successors);
 
         // If EOF is a valid lookahead token, add a transition predicated on the null
         // character that leads to the empty set of NFA states.
         if eof_valid {
             let (next_state_id, _) = self.add_state(Vec::new(), false);
-            info!("populate state: {}, character: EOF", state_id);
+            info!("lex state: {}, successor: EOF", state_id);
             self.table.states[state_id].advance_actions.push((
                 CharacterSet::empty().add_char('\0'),
                 AdvanceAction {
@@ -166,7 +176,9 @@ impl<'a> LexTableBuilder<'a> {
 
         for (chars, advance_precedence, next_states, is_sep) in successors {
             if let Some((_, completed_precedence)) = completion {
-                if advance_precedence < completed_precedence {
+                if advance_precedence < completed_precedence
+                    || (advance_precedence == completed_precedence && is_sep)
+                {
                     continue;
                 }
             }
@@ -188,7 +200,7 @@ impl<'a> LexTableBuilder<'a> {
     }
 }
 
-fn shrink_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
+fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
     let mut state_replacements = BTreeMap::new();
     let mut done = false;
     while !done {
diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index 6f930463..9bccf238 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -67,7 +67,7 @@ impl<'a> ParseTableBuilder<'a> {
             // info!(
             //     "state: {}, item set: {}",
             //     entry.state_id,
-            //     ParseItemSetDisplay(
+            //     super::item::ParseItemSetDisplay(
             //         &self.item_sets_by_state_id[entry.state_id],
             //         self.syntax_grammar,
             //         self.lexical_grammar,
@@ -77,6 +77,17 @@ impl<'a> ParseTableBuilder<'a> {
             let item_set = self
                 .item_set_builder
                 .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
+
+            // info!(
+            //     "state: {}, closed item set: {}",
+            //     entry.state_id,
+            //     super::item::ParseItemSetDisplay(
+            //         &item_set,
+            //         self.syntax_grammar,
+            //         self.lexical_grammar,
+            //     )
+            // );
+
             self.add_actions(
                 entry.preceding_symbols,
                 entry.preceding_auxiliary_symbols,
diff --git a/src/build_tables/shrink_parse_table.rs b/src/build_tables/minimize_parse_table.rs
similarity index 97%
rename from src/build_tables/shrink_parse_table.rs
rename to src/build_tables/minimize_parse_table.rs
index 64a4b259..573bf974 100644
--- a/src/build_tables/shrink_parse_table.rs
+++ b/src/build_tables/minimize_parse_table.rs
@@ -5,26 +5,26 @@ use crate::rules::{AliasMap, Symbol};
 use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
 use hashbrown::{HashMap, HashSet};
 
-pub(crate) fn shrink_parse_table(
+pub(crate) fn minimize_parse_table(
     parse_table: &mut ParseTable,
     syntax_grammar: &SyntaxGrammar,
     simple_aliases: &AliasMap,
     token_conflict_map: &TokenConflictMap,
     keywords: &LookaheadSet,
 ) {
-    let mut optimizer = Optimizer {
+    let mut minimizer = Minimizer {
         parse_table,
         syntax_grammar,
         token_conflict_map,
         keywords,
         simple_aliases,
     };
-    optimizer.remove_unit_reductions();
-    optimizer.merge_compatible_states();
-    optimizer.remove_unused_states();
+    minimizer.remove_unit_reductions();
+    minimizer.merge_compatible_states();
+    minimizer.remove_unused_states();
 }
 
-struct Optimizer<'a> {
+struct Minimizer<'a> {
     parse_table: &'a mut ParseTable,
     syntax_grammar: &'a SyntaxGrammar,
     token_conflict_map: &'a TokenConflictMap<'a>,
@@ -32,7 +32,7 @@ struct Optimizer<'a> {
     simple_aliases: &'a AliasMap,
 }
 
-impl<'a> Optimizer<'a> {
+impl<'a> Minimizer<'a> {
     fn remove_unit_reductions(&mut self) {
         let mut aliased_symbols = HashSet::new();
         for variable in &self.syntax_grammar.variables {
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 84659600..886594f8 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -3,14 +3,14 @@ mod build_parse_table;
 mod coincident_tokens;
 mod item;
 mod item_set_builder;
-mod shrink_parse_table;
+mod minimize_parse_table;
 mod token_conflicts;
 
 use self::build_lex_table::build_lex_table;
 use self::build_parse_table::build_parse_table;
 use self::coincident_tokens::CoincidentTokenIndex;
 use self::item::LookaheadSet;
-use self::shrink_parse_table::shrink_parse_table;
+use self::minimize_parse_table::minimize_parse_table;
 use self::token_conflicts::TokenConflictMap;
 use crate::error::Result;
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
@@ -23,6 +23,7 @@ pub(crate) fn build_tables(
     lexical_grammar: &LexicalGrammar,
     simple_aliases: &AliasMap,
     inlines: &InlinedProductionMap,
+    minimize: bool,
 ) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
     let (mut parse_table, following_tokens) =
         build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
@@ -42,15 +43,22 @@ pub(crate) fn build_tables(
         &coincident_token_index,
         &token_conflict_map,
     );
-    shrink_parse_table(
+    if minimize {
+        minimize_parse_table(
+            &mut parse_table,
+            syntax_grammar,
+            simple_aliases,
+            &token_conflict_map,
+            &keywords,
+        );
+    }
+    let (main_lex_table, keyword_lex_table) = build_lex_table(
         &mut parse_table,
         syntax_grammar,
-        simple_aliases,
-        &token_conflict_map,
+        lexical_grammar,
         &keywords,
+        minimize,
     );
-    let (main_lex_table, keyword_lex_table) =
-        build_lex_table(&mut parse_table, syntax_grammar, lexical_grammar, &keywords);
     Ok((
         parse_table,
         main_lex_table,
diff --git a/src/generate.rs b/src/generate.rs
index cdbbea4f..d574c165 100644
--- a/src/generate.rs
+++ b/src/generate.rs
@@ -4,14 +4,15 @@ use crate::prepare_grammar::prepare_grammar;
 use crate::build_tables::build_tables;
 use crate::render::render_c_code;
 
-pub fn generate_parser_for_grammar(input: &str) -> Result<String> {
+pub fn generate_parser_for_grammar(input: &str, minimize: bool) -> Result<String> {
     let input_grammar = parse_grammar(input)?;
     let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?;
     let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
         &syntax_grammar,
         &lexical_grammar,
         &simple_aliases,
-        &inlines
+        &inlines,
+        minimize
     )?;
     let c_code = render_c_code(
         &input_grammar.name,
diff --git a/src/main.rs b/src/main.rs
index a08922b7..10820ed1 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -33,7 +33,8 @@ fn main() -> error::Result<()> {
         .subcommand(
             SubCommand::with_name("generate")
                 .about("Generate a parser")
-                .arg(Arg::with_name("log").long("log")),
+                .arg(Arg::with_name("log").long("log"))
+                .arg(Arg::with_name("no-minimize").long("no-minimize")),
         )
         .subcommand(
             SubCommand::with_name("parse")
@@ -54,10 +55,11 @@ fn main() -> error::Result<()> {
             logger::init();
         }
 
+        let minimize = !matches.is_present("no-minimize");
         let mut grammar_path = env::current_dir().expect("Failed to read CWD");
         grammar_path.push("grammar.js");
         let grammar_json = load_js_grammar_file(grammar_path);
-        let code = generate::generate_parser_for_grammar(&grammar_json)?;
+        let code = generate::generate_parser_for_grammar(&grammar_json, minimize)?;
         println!("{}", code);
     }
 

From cc0fbc0d9306a838d10a7b258a58fa7f76c55cc3 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 09:12:05 -0800
Subject: [PATCH 092/208] Fix and simplify handling of precedence for
 completion of tokens

---
 src/prepare_grammar/expand_tokens.rs | 88 +++++++++++-----------------
 1 file changed, 33 insertions(+), 55 deletions(-)

diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 6520c432..01b925f9 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -28,6 +28,13 @@ fn get_implicit_precedence(rule: &Rule) -> i32 {
     }
 }
 
+fn get_completion_precedence(rule: &Rule) -> i32 {
+    match rule {
+        Rule::Metadata { params, .. } => params.precedence.unwrap_or(0),
+        _ => 0,
+    }
+}
+
 pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
     let mut builder = NfaBuilder {
         nfa: Nfa::new(),
@@ -52,7 +59,7 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
         builder.is_sep = false;
         builder.nfa.states.push(NfaState::Accept {
             variable_index: i,
-            precedence: 0,
+            precedence: get_completion_precedence(&variable.rule),
         });
         let last_state_id = builder.nfa.last_state_id();
         builder
@@ -345,7 +352,6 @@ impl NfaBuilder {
 
     fn push_advance(&mut self, chars: CharacterSet, state_id: u32) {
         let precedence = *self.precedence_stack.last().unwrap();
-        self.add_precedence(precedence, vec![state_id]);
         self.nfa.states.push(NfaState::Advance {
             chars,
             state_id,
@@ -360,28 +366,6 @@ impl NfaBuilder {
             .states
             .push(NfaState::Split(state_id, last_state_id));
     }
-
-    fn add_precedence(&mut self, prec: i32, mut state_ids: Vec<u32>) {
-        let mut i = 0;
-        while i < state_ids.len() {
-            let state_id = state_ids[i];
-            let (left, right) = match &mut self.nfa.states[state_id as usize] {
-                NfaState::Accept { precedence, .. } => {
-                    *precedence = prec;
-                    return;
-                }
-                NfaState::Split(left, right) => (*left, *right),
-                _ => return,
-            };
-            if !state_ids.contains(&left) {
-                state_ids.push(left);
-            }
-            if !state_ids.contains(&right) {
-                state_ids.push(right);
-            }
-            i += 1;
-        }
-    }
 }
 
 #[cfg(test)]
@@ -551,17 +535,21 @@ mod tests {
                     ("aeeeef", Some((2, "aeeee"))),
                 ],
             },
+            // immediate tokens with higher precedence
             Row {
                 rules: vec![
-                    Rule::seq(vec![
-                        Rule::string("a"),
-                        Rule::choice(vec![
-                            Rule::string("b"),
-                            Rule::string("c"),
-                        ]),
-                        Rule::string("d"),
-                    ])
+                    Rule::prec(1, Rule::pattern("[^a]+")),
+                    Rule::immediate_token(Rule::prec(2, Rule::pattern("[^ab]+"))),
                 ],
+                separators: vec![Rule::pattern("\\s")],
+                examples: vec![("cccb", Some((1, "ccc")))],
+            },
+            Row {
+                rules: vec![Rule::seq(vec![
+                    Rule::string("a"),
+                    Rule::choice(vec![Rule::string("b"), Rule::string("c")]),
+                    Rule::string("d"),
+                ])],
                 separators: vec![],
                 examples: vec![
                     ("abd", Some((0, "abd"))),
@@ -570,34 +558,24 @@ mod tests {
                     ("ad", None),
                     ("d", None),
                     ("a", None),
-                ]
+                ],
             },
             // nested choices within sequences
             Row {
-                rules: vec![
-                    Rule::seq(vec![
-                        Rule::pattern("[0-9]+"),
-                        Rule::choice(vec![
-                            Rule::Blank,
+                rules: vec![Rule::seq(vec![
+                    Rule::pattern("[0-9]+"),
+                    Rule::choice(vec![
+                        Rule::Blank,
+                        Rule::choice(vec![Rule::seq(vec![
+                            Rule::choice(vec![Rule::string("e"), Rule::string("E")]),
                             Rule::choice(vec![
-                                Rule::seq(vec![
-                                    Rule::choice(vec![
-                                        Rule::string("e"),
-                                        Rule::string("E")
-                                    ]),
-                                    Rule::choice(vec![
-                                        Rule::Blank,
-                                        Rule::choice(vec![
-                                            Rule::string("+"),
-                                            Rule::string("-"),
-                                        ])
-                                    ]),
-                                    Rule::pattern("[0-9]+"),
-                                ])
-                            ])
-                        ]),
+                                Rule::Blank,
+                                Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
+                            ]),
+                            Rule::pattern("[0-9]+"),
+                        ])]),
                     ]),
-                ],
+                ])],
                 separators: vec![],
                 examples: vec![
                     ("12", Some((0, "12"))),

From d845b81ee961d37e8506a2b421d54b867bb7e3c7 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 09:42:06 -0800
Subject: [PATCH 093/208] Represent nfa transitions as structs with named
 fields, not tuples

---
 src/build_tables/build_lex_table.rs  |  24 ++-
 src/build_tables/mod.rs              |   2 +-
 src/build_tables/token_conflicts.rs  |  26 +--
 src/nfa.rs                           | 259 +++++++++++++++++----------
 src/prepare_grammar/expand_tokens.rs |  16 +-
 5 files changed, 211 insertions(+), 116 deletions(-)

diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index 9c440f4e..4212d62b 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -1,7 +1,7 @@
 use super::item::LookaheadSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
-use crate::nfa::{CharacterSet, NfaCursor};
+use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
 use crate::rules::Symbol;
 use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
 use std::collections::hash_map::Entry;
@@ -157,8 +157,8 @@ impl<'a> LexTableBuilder<'a> {
             completion.map(|(id, prec)| (&self.lexical_grammar.variables[id].name, prec))
         );
 
-        let successors = self.cursor.grouped_successors();
-        info!("lex state: {}, successors: {:?}", state_id, successors);
+        let transitions = self.cursor.transitions();
+        info!("lex state: {}, transitions: {:?}", state_id, transitions);
 
         // If EOF is a valid lookahead token, add a transition predicated on the null
         // character that leads to the empty set of NFA states.
@@ -174,20 +174,26 @@ impl<'a> LexTableBuilder<'a> {
             ));
         }
 
-        for (chars, advance_precedence, next_states, is_sep) in successors {
+        for NfaTransition {
+            characters,
+            precedence,
+            states,
+            is_separator,
+        } in transitions
+        {
             if let Some((_, completed_precedence)) = completion {
-                if advance_precedence < completed_precedence
-                    || (advance_precedence == completed_precedence && is_sep)
+                if precedence < completed_precedence
+                    || (precedence == completed_precedence && is_separator)
                 {
                     continue;
                 }
             }
-            let (next_state_id, _) = self.add_state(next_states, eof_valid && is_sep);
+            let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
             self.table.states[state_id].advance_actions.push((
-                chars,
+                characters,
                 AdvanceAction {
                     state: next_state_id,
-                    in_main_token: !is_sep,
+                    in_main_token: !is_separator,
                 },
             ));
         }
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 886594f8..78798732 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -239,7 +239,7 @@ fn identify_keywords(
 }
 
 fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
-    cursor.successors().all(|(chars, _, _, is_sep)| {
+    cursor.transition_chars().all(|(chars, is_sep)| {
         if is_sep {
             true
         } else if let CharacterSet::Include(chars) = chars {
diff --git a/src/build_tables/token_conflicts.rs b/src/build_tables/token_conflicts.rs
index 91edadec..cb2b6efe 100644
--- a/src/build_tables/token_conflicts.rs
+++ b/src/build_tables/token_conflicts.rs
@@ -1,6 +1,6 @@
 use crate::build_tables::item::LookaheadSet;
 use crate::grammars::LexicalGrammar;
-use crate::nfa::{CharacterSet, NfaCursor};
+use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
 use hashbrown::HashSet;
 use std::cmp::Ordering;
 use std::fmt;
@@ -131,7 +131,7 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
     for variable in &grammar.variables {
         cursor.reset(vec![variable.start_state]);
         let mut all_chars = CharacterSet::empty();
-        for (chars, _, _, _) in cursor.successors() {
+        for (chars, _) in cursor.transition_chars() {
             all_chars = all_chars.add(chars);
         }
         result.push(all_chars);
@@ -215,12 +215,18 @@ fn compute_conflict_status(
             }
         }
 
-        for (chars, advance_precedence, next_states, in_sep) in cursor.grouped_successors() {
+        for NfaTransition {
+            characters,
+            precedence,
+            states,
+            is_separator,
+        } in cursor.transitions()
+        {
             let mut can_advance = true;
             if let Some((completed_id, completed_precedence)) = completion {
                 let mut other_id = None;
                 let mut successor_contains_completed_id = false;
-                for variable_id in variable_ids_for_states(&next_states, grammar) {
+                for variable_id in variable_ids_for_states(&states, grammar) {
                     if variable_id == completed_id {
                         successor_contains_completed_id = true;
                         break;
@@ -231,7 +237,7 @@ fn compute_conflict_status(
 
                 if let (Some(other_id), false) = (other_id, successor_contains_completed_id) {
                     let winning_id;
-                    if advance_precedence < completed_precedence {
+                    if precedence < completed_precedence {
                         winning_id = completed_id;
                         can_advance = false;
                     } else {
@@ -240,23 +246,23 @@ fn compute_conflict_status(
 
                     if winning_id == i {
                         result.0.does_overlap = true;
-                        if chars.does_intersect(&following_chars[j]) {
+                        if characters.does_intersect(&following_chars[j]) {
                             result.0.does_match_valid_continuation = true;
                         }
-                        if in_sep {
+                        if is_separator {
                             result.0.does_match_separators = true;
                         }
                     } else {
                         result.1.does_overlap = true;
-                        if chars.does_intersect(&following_chars[i]) {
+                        if characters.does_intersect(&following_chars[i]) {
                             result.1.does_match_valid_continuation = true;
                         }
                     }
                 }
             }
 
-            if can_advance && visited_state_sets.insert(next_states.clone()) {
-                state_set_queue.push(next_states);
+            if can_advance && visited_state_sets.insert(states.clone()) {
+                state_set_queue.push(states);
             }
         }
     }
diff --git a/src/nfa.rs b/src/nfa.rs
index b746200f..54e34814 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -30,18 +30,26 @@ pub struct Nfa {
     pub states: Vec<NfaState>,
 }
 
-impl Default for Nfa {
-    fn default() -> Self {
-        Self { states: Vec::new() }
-    }
-}
-
 #[derive(Debug)]
 pub struct NfaCursor<'a> {
     pub(crate) state_ids: Vec<u32>,
     nfa: &'a Nfa,
 }
 
+#[derive(Debug, PartialEq, Eq)]
+pub struct NfaTransition {
+    pub characters: CharacterSet,
+    pub is_separator: bool,
+    pub precedence: i32,
+    pub states: Vec<u32>,
+}
+
+impl Default for Nfa {
+    fn default() -> Self {
+        Self { states: Vec::new() }
+    }
+}
+
 impl CharacterSet {
     pub fn empty() -> Self {
         CharacterSet::Include(Vec::new())
@@ -328,7 +336,15 @@ impl<'a> NfaCursor<'a> {
         self.state_ids = states
     }
 
-    pub fn successors(&self) -> impl Iterator<Item = (&CharacterSet, i32, u32, bool)> {
+    pub fn transition_chars(&self) -> impl Iterator<Item = (&CharacterSet, bool)> {
+        self.raw_transitions().map(|t| (t.0, t.1))
+    }
+
+    pub fn transitions(&self) -> Vec<NfaTransition> {
+        Self::group_transitions(self.raw_transitions())
+    }
+
+    fn raw_transitions(&self) -> impl Iterator<Item = (&CharacterSet, bool, i32, u32)> {
         self.state_ids.iter().filter_map(move |id| {
             if let NfaState::Advance {
                 chars,
@@ -337,52 +353,53 @@ impl<'a> NfaCursor<'a> {
                 is_sep,
             } = &self.nfa.states[*id as usize]
             {
-                Some((chars, *precedence, *state_id, *is_sep))
+                Some((chars, *is_sep, *precedence, *state_id))
             } else {
                 None
             }
         })
     }
 
-    pub fn grouped_successors(&self) -> Vec<(CharacterSet, i32, Vec<u32>, bool)> {
-        Self::group_successors(self.successors())
-    }
-
-    fn group_successors<'b>(
-        iter: impl Iterator<Item = (&'b CharacterSet, i32, u32, bool)>,
-    ) -> Vec<(CharacterSet, i32, Vec<u32>, bool)> {
-        let mut result: Vec<(CharacterSet, i32, Vec<u32>, bool)> = Vec::new();
-        for (chars, prec, state, is_sep) in iter {
+    fn group_transitions<'b>(
+        iter: impl Iterator<Item = (&'b CharacterSet, bool, i32, u32)>,
+    ) -> Vec<NfaTransition> {
+        let mut result: Vec<NfaTransition> = Vec::new();
+        for (chars, is_sep, prec, state) in iter {
             let mut chars = chars.clone();
             let mut i = 0;
             while i < result.len() && !chars.is_empty() {
-                let intersection = result[i].0.remove_intersection(&mut chars);
+                let intersection = result[i].characters.remove_intersection(&mut chars);
                 if !intersection.is_empty() {
-                    let mut intersection_states = result[i].2.clone();
+                    let mut intersection_states = result[i].states.clone();
                     match intersection_states.binary_search(&state) {
                         Err(j) => intersection_states.insert(j, state),
                         _ => {}
                     }
-                    let intersection_entry = (
-                        intersection,
-                        max(result[i].1, prec),
-                        intersection_states,
-                        result[i].3 || is_sep,
-                    );
-                    if result[i].0.is_empty() {
-                        result[i] = intersection_entry;
+                    let intersection_transition = NfaTransition {
+                        characters: intersection,
+                        is_separator: result[i].is_separator || is_sep,
+                        precedence: max(result[i].precedence, prec),
+                        states: intersection_states,
+                    };
+                    if result[i].characters.is_empty() {
+                        result[i] = intersection_transition;
                     } else {
-                        result.insert(i, intersection_entry);
+                        result.insert(i, intersection_transition);
                         i += 1;
                     }
                 }
                 i += 1;
             }
             if !chars.is_empty() {
-                result.push((chars, prec, vec![state], is_sep));
+                result.push(NfaTransition {
+                    characters: chars,
+                    precedence: prec,
+                    states: vec![state],
+                    is_separator: is_sep,
+                });
             }
         }
-        result.sort_unstable_by(|a, b| a.0.cmp(&b.0));
+        result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters));
         result
     }
 
@@ -435,111 +452,173 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_group_successors() {
+    fn test_group_transitions() {
         let table = [
             // overlapping character classes
             (
                 vec![
-                    (CharacterSet::empty().add_range('a', 'f'), 0, 1, false),
-                    (CharacterSet::empty().add_range('d', 'i'), 1, 2, false),
+                    (CharacterSet::empty().add_range('a', 'f'), false, 0, 1),
+                    (CharacterSet::empty().add_range('d', 'i'), false, 1, 2),
                 ],
                 vec![
-                    (CharacterSet::empty().add_range('a', 'c'), 0, vec![1], false),
-                    (
-                        CharacterSet::empty().add_range('d', 'f'),
-                        1,
-                        vec![1, 2],
-                        false,
-                    ),
-                    (CharacterSet::empty().add_range('g', 'i'), 1, vec![2], false),
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_range('a', 'c'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_range('d', 'f'),
+                        is_separator: false,
+                        precedence: 1,
+                        states: vec![1, 2],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_range('g', 'i'),
+                        is_separator: false,
+                        precedence: 1,
+                        states: vec![2],
+                    },
                 ],
             ),
             // large character class followed by many individual characters
             (
                 vec![
-                    (CharacterSet::empty().add_range('a', 'z'), 0, 1, false),
-                    (CharacterSet::empty().add_char('d'), 0, 2, false),
-                    (CharacterSet::empty().add_char('i'), 0, 3, false),
-                    (CharacterSet::empty().add_char('f'), 0, 4, false),
+                    (CharacterSet::empty().add_range('a', 'z'), false, 0, 1),
+                    (CharacterSet::empty().add_char('d'), false, 0, 2),
+                    (CharacterSet::empty().add_char('i'), false, 0, 3),
+                    (CharacterSet::empty().add_char('f'), false, 0, 4),
                 ],
                 vec![
-                    (CharacterSet::empty().add_char('d'), 0, vec![1, 2], false),
-                    (CharacterSet::empty().add_char('f'), 0, vec![1, 4], false),
-                    (CharacterSet::empty().add_char('i'), 0, vec![1, 3], false),
-                    (
-                        CharacterSet::empty()
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('d'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1, 2],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('f'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1, 4],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('i'),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1, 3],
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty()
                             .add_range('a', 'c')
                             .add_char('e')
                             .add_range('g', 'h')
                             .add_range('j', 'z'),
-                        0,
-                        vec![1],
-                        false,
-                    ),
+                        is_separator: false,
+                        precedence: 0,
+                        states: vec![1],
+                    },
                 ],
             ),
             // negated character class followed by an individual character
             (
                 vec![
-                    (CharacterSet::empty().add_char('0'), 0, 1, false),
-                    (CharacterSet::empty().add_char('b'), 0, 2, false),
+                    (CharacterSet::empty().add_char('0'), false, 0, 1),
+                    (CharacterSet::empty().add_char('b'), false, 0, 2),
                     (
                         CharacterSet::empty().add_range('a', 'f').negate(),
+                        false,
                         0,
                         3,
-                        false,
                     ),
-                    (CharacterSet::empty().add_char('c'), 0, 4, false),
+                    (CharacterSet::empty().add_char('c'), false, 0, 4),
                 ],
                 vec![
-                    (CharacterSet::empty().add_char('0'), 0, vec![1, 3], false),
-                    (CharacterSet::empty().add_char('b'), 0, vec![2], false),
-                    (CharacterSet::empty().add_char('c'), 0, vec![4], false),
-                    (
-                        CharacterSet::empty()
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('0'),
+                        precedence: 0,
+                        states: vec![1, 3],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('b'),
+                        precedence: 0,
+                        states: vec![2],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty().add_char('c'),
+                        precedence: 0,
+                        states: vec![4],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::empty()
                             .add_range('a', 'f')
                             .add_char('0')
                             .negate(),
-                        0,
-                        vec![3],
-                        false,
-                    ),
+                        precedence: 0,
+                        states: vec![3],
+                        is_separator: false,
+                    },
                 ],
             ),
             // multiple negated character classes
             (
                 vec![
-                    (CharacterSet::Include(vec!['a']), 0, 1, false),
-                    (CharacterSet::Exclude(vec!['a', 'b', 'c']), 0, 2, false),
-                    (CharacterSet::Include(vec!['g']), 0, 6, false),
-                    (CharacterSet::Exclude(vec!['d', 'e', 'f']), 0, 3, false),
-                    (CharacterSet::Exclude(vec!['g', 'h', 'i']), 0, 4, false),
-                    (CharacterSet::Include(vec!['g']), 0, 5, false),
+                    (CharacterSet::Include(vec!['a']), false, 0, 1),
+                    (CharacterSet::Exclude(vec!['a', 'b', 'c']), false, 0, 2),
+                    (CharacterSet::Include(vec!['g']), false, 0, 6),
+                    (CharacterSet::Exclude(vec!['d', 'e', 'f']), false, 0, 3),
+                    (CharacterSet::Exclude(vec!['g', 'h', 'i']), false, 0, 4),
+                    (CharacterSet::Include(vec!['g']), false, 0, 5),
                 ],
                 vec![
-                    (CharacterSet::Include(vec!['a']), 0, vec![1, 3, 4], false),
-                    (CharacterSet::Include(vec!['g']), 0, vec![2, 3, 5, 6], false),
-                    (CharacterSet::Include(vec!['b', 'c']), 0, vec![3, 4], false),
-                    (CharacterSet::Include(vec!['h', 'i']), 0, vec![2, 3], false),
-                    (
-                        CharacterSet::Include(vec!['d', 'e', 'f']),
-                        0,
-                        vec![2, 4],
-                        false,
-                    ),
-                    (
-                        CharacterSet::Exclude(vec!['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']),
-                        0,
-                        vec![2, 3, 4],
-                        false,
-                    ),
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['a']),
+                        precedence: 0,
+                        states: vec![1, 3, 4],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['g']),
+                        precedence: 0,
+                        states: vec![2, 3, 5, 6],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['b', 'c']),
+                        precedence: 0,
+                        states: vec![3, 4],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['h', 'i']),
+                        precedence: 0,
+                        states: vec![2, 3],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Include(vec!['d', 'e', 'f']),
+                        precedence: 0,
+                        states: vec![2, 4],
+                        is_separator: false,
+                    },
+                    NfaTransition {
+                        characters: CharacterSet::Exclude(vec![
+                            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
+                        ]),
+                        precedence: 0,
+                        states: vec![2, 3, 4],
+                        is_separator: false,
+                    },
                 ],
             ),
         ];
 
         for row in table.iter() {
             assert_eq!(
-                NfaCursor::group_successors(row.0.iter().map(|(c, p, s, sep)| (c, *p, *s, *sep))),
+                NfaCursor::group_transitions(row.0.iter().map(|(c, sep, p, s)| (c, *sep, *p, *s))),
                 row.1
             );
         }
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 01b925f9..91a0e364 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -372,7 +372,7 @@ impl NfaBuilder {
 mod tests {
     use super::*;
     use crate::grammars::Variable;
-    use crate::nfa::NfaCursor;
+    use crate::nfa::{NfaCursor, NfaTransition};
 
     fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
         let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
@@ -389,14 +389,18 @@ mod tests {
                     result_precedence = precedence;
                 }
             }
-            if let Some((_, _, next_states, in_sep)) = cursor
-                .grouped_successors()
+            if let Some(NfaTransition {
+                states,
+                is_separator,
+                ..
+            }) = cursor
+                .transitions()
                 .into_iter()
-                .find(|(chars, prec, _, _)| chars.contains(c) && *prec >= result_precedence)
+                .find(|t| t.characters.contains(c) && t.precedence >= result_precedence)
             {
-                cursor.reset(next_states);
+                cursor.reset(states);
                 end_char += 1;
-                if in_sep {
+                if is_separator {
                     start_char = end_char;
                 }
             } else {

From 79b9d5ebed3470195e05b50d3f0b42b21cb7c69b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 11:19:53 -0800
Subject: [PATCH 094/208] Fix minor differences in generated C code

---
 src/build_tables/build_parse_table.rs | 12 ++++++------
 src/render/mod.rs                     | 20 +++++++++-----------
 src/rules.rs                          |  2 +-
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index 9bccf238..5fc015af 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -531,7 +531,6 @@ impl<'a> ParseTableBuilder<'a> {
     }
 
     fn populate_used_symbols(&mut self) {
-        self.parse_table.symbols.push(Symbol::end());
         let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
         let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
         let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
@@ -547,16 +546,17 @@ impl<'a> ParseTableBuilder<'a> {
                 non_terminal_usages[symbol.index] = true;
             }
         }
-        for (i, value) in terminal_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::terminal(i));
-            }
-        }
         for (i, value) in external_usages.into_iter().enumerate() {
             if value {
                 self.parse_table.symbols.push(Symbol::external(i));
             }
         }
+        self.parse_table.symbols.push(Symbol::end());
+        for (i, value) in terminal_usages.into_iter().enumerate() {
+            if value {
+                self.parse_table.symbols.push(Symbol::terminal(i));
+            }
+        }
         for (i, value) in non_terminal_usages.into_iter().enumerate() {
             if value {
                 self.parse_table.symbols.push(Symbol::non_terminal(i));
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 0c0e6e59..61c167bb 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -232,15 +232,13 @@ impl Generator {
         add_line!(self, "static const char *ts_symbol_names[] = {{");
         indent!(self);
         for symbol in self.parse_table.symbols.iter() {
-            if *symbol != Symbol::end() {
-                let name = self.sanitize_string(
-                    self.simple_aliases
-                        .get(symbol)
-                        .map(|alias| alias.value.as_str())
-                        .unwrap_or(self.metadata_for_symbol(*symbol).0),
-                );
-                add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name);
-            }
+            let name = self.sanitize_string(
+                self.simple_aliases
+                    .get(symbol)
+                    .map(|alias| alias.value.as_str())
+                    .unwrap_or(self.metadata_for_symbol(*symbol).0),
+            );
+            add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name);
         }
         for (alias, symbol) in &self.alias_map {
             if symbol.is_none() {
@@ -864,7 +862,7 @@ impl Generator {
 
     fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
         match symbol.kind {
-            SymbolType::End => ("end", VariableType::Auxiliary),
+            SymbolType::End => ("end", VariableType::Hidden),
             SymbolType::NonTerminal => {
                 let variable = &self.syntax_grammar.variables[symbol.index];
                 (&variable.name, variable.kind)
@@ -950,7 +948,7 @@ impl Generator {
     fn add_character(&mut self, c: char) {
         if c.is_ascii() {
             match c {
-                '\0' => add!(self, "'\\0'"),
+                '\0' => add!(self, "0"),
                 '\'' => add!(self, "'\\''"),
                 '\\' => add!(self, "'\\\\'"),
                 '\t' => add!(self, "'\\t'"),
diff --git a/src/rules.rs b/src/rules.rs
index bd0340fc..e15070ea 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -3,9 +3,9 @@ use hashbrown::HashMap;
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub(crate) enum SymbolType {
     External,
+    End,
     Terminal,
     NonTerminal,
-    End,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]

From baf7f3603c5eca1c338be4665d516ff6d189a020 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 11:30:53 -0800
Subject: [PATCH 095/208] Mark fragile tokens

---
 src/build_tables/mod.rs | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index 78798732..ed47665e 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -43,6 +43,11 @@ pub(crate) fn build_tables(
         &coincident_token_index,
         &token_conflict_map,
     );
+    mark_fragile_tokens(
+        &mut parse_table,
+        lexical_grammar,
+        &token_conflict_map,
+    );
     if minimize {
         minimize_parse_table(
             &mut parse_table,
@@ -238,6 +243,34 @@ fn identify_keywords(
     keywords
 }
 
+fn mark_fragile_tokens(
+    parse_table: &mut ParseTable,
+    lexical_grammar: &LexicalGrammar,
+    token_conflict_map: &TokenConflictMap,
+) {
+    let n = lexical_grammar.variables.len();
+    let mut valid_tokens_mask = Vec::with_capacity(n);
+    for state in parse_table.states.iter_mut() {
+        valid_tokens_mask.clear();
+        valid_tokens_mask.resize(n, false);
+        for token in state.terminal_entries.keys() {
+            if token.is_terminal() {
+                valid_tokens_mask[token.index] = true;
+            }
+        }
+        for (token, entry) in state.terminal_entries.iter_mut() {
+            for i in 0..n {
+                if token_conflict_map.does_overlap(i, token.index) {
+                    if valid_tokens_mask[i] {
+                        entry.reusable = false;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
+
 fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
     cursor.transition_chars().all(|(chars, is_sep)| {
         if is_sep {

From d0c3e26e8409637f4752a4dafe20297fac4420bc Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 11:52:52 -0800
Subject: [PATCH 096/208] Don't let lex state merging be fooled by trivial
 loops

---
 src/build_tables/build_lex_table.rs | 21 +++++++++++++--------
 src/render/mod.rs                   | 14 +++++++-------
 src/tables.rs                       |  2 +-
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/src/build_tables/build_lex_table.rs b/src/build_tables/build_lex_table.rs
index 4212d62b..bcc1bf3d 100644
--- a/src/build_tables/build_lex_table.rs
+++ b/src/build_tables/build_lex_table.rs
@@ -168,7 +168,7 @@ impl<'a> LexTableBuilder<'a> {
             self.table.states[state_id].advance_actions.push((
                 CharacterSet::empty().add_char('\0'),
                 AdvanceAction {
-                    state: next_state_id,
+                    state: Some(next_state_id),
                     in_main_token: true,
                 },
             ));
@@ -189,10 +189,15 @@ impl<'a> LexTableBuilder<'a> {
                 }
             }
             let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
+            let next_state = if next_state_id == state_id {
+                None
+            } else {
+                Some(next_state_id)
+            };
             self.table.states[state_id].advance_actions.push((
                 characters,
                 AdvanceAction {
-                    state: next_state_id,
+                    state: next_state,
                     in_main_token: !is_separator,
                 },
             ));
@@ -231,10 +236,10 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
             }
         }
         for state in table.states.iter_mut() {
-            for advance_action in state.advance_actions.iter_mut() {
-                if let Some(new_state_id) = state_replacements.get(&advance_action.1.state) {
-                    advance_action.1.state = *new_state_id;
-                }
+            for (_, advance_action) in state.advance_actions.iter_mut() {
+                advance_action.state = advance_action
+                    .state
+                    .map(|s| state_replacements.get(&s).cloned().unwrap_or(s))
             }
         }
     }
@@ -259,8 +264,8 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
     }
 
     for state in table.states.iter_mut() {
-        for advance_action in state.advance_actions.iter_mut() {
-            advance_action.1.state = final_state_replacements[advance_action.1.state];
+        for (_, advance_action) in state.advance_actions.iter_mut() {
+            advance_action.state = advance_action.state.map(|s| final_state_replacements[s]);
         }
     }
 
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 61c167bb..58235fd9 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -342,7 +342,7 @@ impl Generator {
         for (i, state) in lex_table.states.into_iter().enumerate() {
             add_line!(self, "case {}:", i);
             indent!(self);
-            self.add_lex_state(state);
+            self.add_lex_state(i, state);
             dedent!(self);
         }
 
@@ -358,7 +358,7 @@ impl Generator {
         add_line!(self, "");
     }
 
-    fn add_lex_state(&mut self, state: LexState) {
+    fn add_lex_state(&mut self, index: usize, state: LexState) {
         if let Some(accept_action) = state.accept_action {
             add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
         }
@@ -372,14 +372,14 @@ impl Generator {
             if self.add_character_set_condition(&characters, &ruled_out_characters) {
                 add!(self, ")\n");
                 indent!(self);
-                self.add_advance_action(&action);
+                self.add_advance_action(index, &action);
                 if let CharacterSet::Include(chars) = characters {
                     ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
                 }
                 dedent!(self);
             } else {
                 self.buffer.truncate(previous_length);
-                self.add_advance_action(&action);
+                self.add_advance_action(index, &action);
             }
         }
 
@@ -491,11 +491,11 @@ impl Generator {
             })
     }
 
-    fn add_advance_action(&mut self, action: &AdvanceAction) {
+    fn add_advance_action(&mut self, index: usize, action: &AdvanceAction) {
         if action.in_main_token {
-            add_line!(self, "ADVANCE({});", action.state);
+            add_line!(self, "ADVANCE({});", action.state.unwrap_or(index));
         } else {
-            add_line!(self, "SKIP({});", action.state);
+            add_line!(self, "SKIP({});", action.state.unwrap_or(index));
         }
     }
 
diff --git a/src/tables.rs b/src/tables.rs
index f400d25c..c8f7e1e4 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -48,7 +48,7 @@ pub(crate) struct ParseTable {
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct AdvanceAction {
-    pub state: LexStateId,
+    pub state: Option<LexStateId>,
     pub in_main_token: bool,
 }
 

From ba96e4961b9710728e6a9ef02be475e2e942d3ca Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 12:42:45 -0800
Subject: [PATCH 097/208] Simplify error handling, finish up LR conflict
 message generation

---
 src/build_tables/build_parse_table.rs | 93 +++++++++++++++++++++++++--
 src/error.rs                          | 17 +++--
 src/main.rs                           | 11 +++-
 src/prepare_grammar/expand_tokens.rs  |  9 +--
 src/prepare_grammar/extract_tokens.rs | 12 ++--
 src/prepare_grammar/intern_symbols.rs | 14 ++--
 6 files changed, 117 insertions(+), 39 deletions(-)

diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index 5fc015af..e642c3cd 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -455,9 +455,9 @@ impl<'a> ParseTableBuilder<'a> {
             self.symbol_name(&conflicting_lookahead)
         )
         .unwrap();
-        write!(&mut msg, "Possible interpretations:\n").unwrap();
+        write!(&mut msg, "Possible interpretations:\n\n").unwrap();
         for (i, item) in conflicting_items.iter().enumerate() {
-            write!(&mut msg, "\n  {}:", i).unwrap();
+            write!(&mut msg, "  {}:", i + 1).unwrap();
 
             for preceding_symbol in preceding_symbols
                 .iter()
@@ -501,11 +501,89 @@ impl<'a> ParseTableBuilder<'a> {
                 )
                 .unwrap();
             }
+
+            write!(&mut msg, "\n").unwrap();
         }
 
-        // TODO - generate suggested resolutions
+        let mut resolution_count = 0;
+        write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
+        let shift_items = conflicting_items
+            .iter()
+            .filter(|i| !i.is_done())
+            .cloned()
+            .collect::<Vec<_>>();
+        if shift_items.len() > 0 {
+            resolution_count += 1;
+            write!(
+                &mut msg,
+                "  {}:  Specify a higher precedence in",
+                resolution_count
+            )
+            .unwrap();
+            for (i, item) in shift_items.iter().enumerate() {
+                if i > 0 {
+                    write!(&mut msg, "  and").unwrap();
+                }
+                write!(
+                    &mut msg,
+                    " `{}`",
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+            write!(&mut msg, " than in the other rules.\n").unwrap();
+        }
 
-        Err(Error::ConflictError(msg))
+        if considered_associativity {
+            resolution_count += 1;
+            write!(
+                &mut msg,
+                "  {}:  Specify a left or right associativity in ",
+                resolution_count
+            )
+            .unwrap();
+            for (i, item) in conflicting_items.iter().filter(|i| i.is_done()).enumerate() {
+                if i > 0 {
+                    write!(&mut msg, " and ").unwrap();
+                }
+                write!(
+                    &mut msg,
+                    "{}",
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+        }
+
+        for item in &conflicting_items {
+            if item.is_done() {
+                resolution_count += 1;
+                write!(
+                    &mut msg,
+                    "  {}: Specify a higher precedence in `{}` than in the other rules.\n",
+                    resolution_count,
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+        }
+
+        resolution_count += 1;
+        write!(
+            &mut msg,
+            "  {}: Add a conflict for these rules: ",
+            resolution_count
+        )
+        .unwrap();
+        for (i, symbol) in actual_conflict.iter().enumerate() {
+            if i > 0 {
+                write!(&mut msg, ", ").unwrap();
+            }
+            write!(&mut msg, "{}", self.symbol_name(symbol)).unwrap();
+        }
+        write!(&mut msg, "\n").unwrap();
+
+        Err(Error(msg))
     }
 
     fn get_auxiliary_node_info(
@@ -517,8 +595,11 @@ impl<'a> ParseTableBuilder<'a> {
             .entries
             .keys()
             .filter_map(|item| {
-                if item.symbol() == Some(symbol) {
-                    None
+                let variable_index = item.variable_index as usize;
+                if item.symbol() == Some(symbol)
+                    && !self.syntax_grammar.variables[variable_index].is_auxiliary()
+                {
+                    Some(Symbol::non_terminal(variable_index))
                 } else {
                     None
                 }
diff --git a/src/error.rs b/src/error.rs
index b03efa93..9a5801f8 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,25 +1,24 @@
 #[derive(Debug)]
-pub enum Error {
-    GrammarError(String),
-    SymbolError(String),
-    RegexError(String),
-    ConflictError(String),
-}
+pub struct Error(pub String);
 
 pub type Result<T> = std::result::Result<T, Error>;
 
 impl Error {
     pub fn grammar(message: &str) -> Self {
-        Error::GrammarError(message.to_string())
+        Error(format!("Grammar error: {}", message))
     }
 
     pub fn regex(message: &str) -> Self {
-        Error::RegexError(message.to_string())
+        Error(format!("Regex error: {}", message))
+    }
+
+    pub fn undefined_symbol(name: &str) -> Self {
+        Error(format!("Undefined symbol `{}`", name))
     }
 }
 
 impl From<serde_json::Error> for Error {
     fn from(error: serde_json::Error) -> Self {
-        Error::GrammarError(error.to_string())
+        Error(error.to_string())
     }
 }
diff --git a/src/main.rs b/src/main.rs
index 10820ed1..c3dbf33d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -11,7 +11,7 @@ use clap::{App, Arg, SubCommand};
 use std::env;
 use std::io::Write;
 use std::path::PathBuf;
-use std::process::{Command, Stdio};
+use std::process::{exit, Command, Stdio};
 
 mod build_tables;
 mod error;
@@ -25,7 +25,14 @@ mod render;
 mod rules;
 mod tables;
 
-fn main() -> error::Result<()> {
+fn main() {
+    if let Err(e) = run() {
+        eprintln!("{}", e.0);
+        exit(1);
+    }
+}
+
+fn run() -> error::Result<()> {
     let matches = App::new("tree-sitter")
         .version("0.1")
         .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
diff --git a/src/prepare_grammar/expand_tokens.rs b/src/prepare_grammar/expand_tokens.rs
index 91a0e364..2678df19 100644
--- a/src/prepare_grammar/expand_tokens.rs
+++ b/src/prepare_grammar/expand_tokens.rs
@@ -64,12 +64,7 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
         let last_state_id = builder.nfa.last_state_id();
         builder
             .expand_rule(&variable.rule, last_state_id)
-            .map_err(|e| match e {
-                Error::RegexError(msg) => {
-                    Error::RegexError(format!("Rule {} {}", variable.name, msg))
-                }
-                _ => e,
-            })?;
+            .map_err(|Error(msg)| Error(format!("Rule {} {}", variable.name, msg)))?;
 
         if !is_immediate_token {
             builder.is_sep = true;
@@ -97,7 +92,7 @@ impl NfaBuilder {
             Rule::Pattern(s) => {
                 let ast = parse::Parser::new()
                     .parse(&s)
-                    .map_err(|e| Error::GrammarError(e.to_string()))?;
+                    .map_err(|e| Error(e.to_string()))?;
                 self.expand_regex(&ast, next_state_id)
             }
             Rule::String(s) => {
diff --git a/src/prepare_grammar/extract_tokens.rs b/src/prepare_grammar/extract_tokens.rs
index 115933ee..5a54d34e 100644
--- a/src/prepare_grammar/extract_tokens.rs
+++ b/src/prepare_grammar/extract_tokens.rs
@@ -89,7 +89,7 @@ pub(super) fn extract_tokens(
         if let Rule::Symbol(symbol) = rule {
             let new_symbol = symbol_replacer.replace_symbol(symbol);
             if new_symbol.is_non_terminal() {
-                return Err(Error::GrammarError(format!(
+                return Err(Error(format!(
                     "Non-token symbol '{}' cannot be used as an extra token",
                     &variables[new_symbol.index].name
                 )));
@@ -110,7 +110,7 @@ pub(super) fn extract_tokens(
         let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
         if let Rule::Symbol(symbol) = rule {
             if symbol.is_non_terminal() {
-                return Err(Error::GrammarError(format!(
+                return Err(Error(format!(
                     "Rule '{}' cannot be used as both an external token and a non-terminal rule",
                     &variables[symbol.index].name,
                 )));
@@ -130,7 +130,7 @@ pub(super) fn extract_tokens(
                 })
             }
         } else {
-            return Err(Error::GrammarError(format!(
+            return Err(Error(format!(
                 "Non-symbol rules cannot be used as external tokens"
             )));
         }
@@ -140,7 +140,7 @@ pub(super) fn extract_tokens(
     if let Some(token) = grammar.word_token {
         let token = symbol_replacer.replace_symbol(token);
         if token.is_non_terminal() {
-            return Err(Error::GrammarError(format!(
+            return Err(Error(format!(
                 "Non-terminal symbol '{}' cannot be used as the word token",
                 &variables[token.index].name
             )));
@@ -475,7 +475,7 @@ mod test {
         grammar.extra_tokens = vec![Rule::non_terminal(1)];
 
         match extract_tokens(grammar) {
-            Err(Error::GrammarError(s)) => {
+            Err(Error(s)) => {
                 assert_eq!(
                     s,
                     "Non-token symbol 'rule_1' cannot be used as an extra token"
@@ -503,7 +503,7 @@ mod test {
         grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];
 
         match extract_tokens(grammar) {
-            Err(Error::GrammarError(s)) => {
+            Err(Error(s)) => {
                 assert_eq!(s, "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
             }
             _ => {
diff --git a/src/prepare_grammar/intern_symbols.rs b/src/prepare_grammar/intern_symbols.rs
index 5165875c..2e6f5b1c 100644
--- a/src/prepare_grammar/intern_symbols.rs
+++ b/src/prepare_grammar/intern_symbols.rs
@@ -7,7 +7,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
     let interner = Interner { grammar };
 
     if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
-        return Err(Error::GrammarError(
+        return Err(Error(
             "Grammar's start rule must be visible".to_string(),
         ));
     }
@@ -44,7 +44,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
             interned_conflict.push(
                 interner
                     .intern_name(&name)
-                    .ok_or_else(|| symbol_error(name))?,
+                    .ok_or_else(|| Error::undefined_symbol(name))?,
             );
         }
         expected_conflicts.push(interned_conflict);
@@ -62,7 +62,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
         word_token = Some(
             interner
                 .intern_name(&name)
-                .ok_or_else(|| symbol_error(&name))?,
+                .ok_or_else(|| Error::undefined_symbol(&name))?,
         );
     }
 
@@ -107,7 +107,7 @@ impl<'a> Interner<'a> {
                 if let Some(symbol) = self.intern_name(&name) {
                     Ok(Rule::Symbol(symbol))
                 } else {
-                    Err(symbol_error(name))
+                    Err(Error::undefined_symbol(name))
                 }
             }
 
@@ -134,10 +134,6 @@ impl<'a> Interner<'a> {
     }
 }
 
-fn symbol_error(name: &str) -> Error {
-    Error::SymbolError(format!("Undefined symbol '{}'", name))
-}
-
 fn variable_type_for_name(name: &str) -> VariableType {
     if name.starts_with("_") {
         VariableType::Hidden
@@ -223,7 +219,7 @@ mod tests {
         let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
 
         match result {
-            Err(Error::SymbolError(message)) => assert_eq!(message, "Undefined symbol 'y'"),
+            Err(Error(message)) => assert_eq!(message, "Undefined symbol 'y'"),
             _ => panic!("Expected an error but got none"),
         }
     }

From a0e65018ba8282fc8c77734092618e87cfb8cf2d Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 13:01:07 -0800
Subject: [PATCH 098/208] Fix computation of MAX_ALIAS_SEQUENCE_LENGTH

---
 src/build_tables/build_parse_table.rs |  6 +++++-
 src/render/mod.rs                     | 10 ++--------
 src/tables.rs                         |  1 +
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index e642c3cd..7fb668dd 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -675,6 +675,9 @@ impl<'a> ParseTableBuilder<'a> {
         while alias_sequence.last() == Some(&None) {
             alias_sequence.pop();
         }
+        if item.production.steps.len() > self.parse_table.max_aliased_production_length {
+            self.parse_table.max_aliased_production_length = item.production.steps.len()
+        }
         if let Some(index) = self
             .parse_table
             .alias_sequences
@@ -721,8 +724,9 @@ pub(crate) fn build_parse_table(
         parse_state_queue: VecDeque::new(),
         parse_table: ParseTable {
             states: Vec::new(),
-            alias_sequences: Vec::new(),
             symbols: Vec::new(),
+            alias_sequences: Vec::new(),
+            max_aliased_production_length: 0,
         },
         following_tokens: vec![LookaheadSet::new(); lexical_grammar.variables.len()],
     }
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 58235fd9..8d3ee195 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -191,17 +191,11 @@ impl Generator {
             "#define EXTERNAL_TOKEN_COUNT {}",
             self.syntax_grammar.external_tokens.len()
         );
-        if let Some(max_alias_sequence_length) = self
-            .parse_table
-            .alias_sequences
-            .iter()
-            .map(|seq| seq.len())
-            .max()
-        {
+        if self.parse_table.max_aliased_production_length > 0 {
             add_line!(
                 self,
                 "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
-                max_alias_sequence_length
+                self.parse_table.max_aliased_production_length
             );
         }
         add_line!(self, "");
diff --git a/src/tables.rs b/src/tables.rs
index c8f7e1e4..edbbaaab 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -44,6 +44,7 @@ pub(crate) struct ParseTable {
     pub states: Vec<ParseState>,
     pub symbols: Vec<Symbol>,
     pub alias_sequences: Vec<Vec<Option<Alias>>>,
+    pub max_aliased_production_length: usize,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]

From 3a727af2645fb41d3f2151d1b1b4893232e49c06 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 15:26:48 -0800
Subject: [PATCH 099/208] Add flag for logging the item set associated with a
 certain parse state

---
 src/build_tables/build_parse_table.rs | 43 ++++++++++++++-------------
 src/build_tables/item.rs              | 18 ++++++++++-
 src/build_tables/mod.rs               |  3 +-
 src/generate.rs                       | 16 ++++++----
 src/main.rs                           | 15 +++++++++-
 5 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/src/build_tables/build_parse_table.rs b/src/build_tables/build_parse_table.rs
index 7fb668dd..cda1d7ea 100644
--- a/src/build_tables/build_parse_table.rs
+++ b/src/build_tables/build_parse_table.rs
@@ -39,6 +39,7 @@ struct ParseTableBuilder<'a> {
     parse_state_queue: VecDeque<ParseStateQueueEntry>,
     parse_table: ParseTable,
     following_tokens: Vec<LookaheadSet>,
+    state_ids_to_log: Vec<ParseStateId>,
 }
 
 impl<'a> ParseTableBuilder<'a> {
@@ -64,29 +65,26 @@ impl<'a> ParseTableBuilder<'a> {
         );
 
         while let Some(entry) = self.parse_state_queue.pop_front() {
-            // info!(
-            //     "state: {}, item set: {}",
-            //     entry.state_id,
-            //     super::item::ParseItemSetDisplay(
-            //         &self.item_sets_by_state_id[entry.state_id],
-            //         self.syntax_grammar,
-            //         self.lexical_grammar,
-            //     )
-            // );
-
             let item_set = self
                 .item_set_builder
                 .transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
 
-            // info!(
-            //     "state: {}, closed item set: {}",
-            //     entry.state_id,
-            //     super::item::ParseItemSetDisplay(
-            //         &item_set,
-            //         self.syntax_grammar,
-            //         self.lexical_grammar,
-            //     )
-            // );
+            if self.state_ids_to_log.contains(&entry.state_id) {
+                eprintln!(
+                    "state: {}\n\ninitial item set:\n\n{}closed item set:\n\n{}",
+                    entry.state_id,
+                    super::item::ParseItemSetDisplay(
+                        &self.item_sets_by_state_id[entry.state_id],
+                        self.syntax_grammar,
+                        self.lexical_grammar,
+                    ),
+                    super::item::ParseItemSetDisplay(
+                        &item_set,
+                        self.syntax_grammar,
+                        self.lexical_grammar,
+                    )
+                );
+            }
 
             self.add_actions(
                 entry.preceding_symbols,
@@ -553,6 +551,7 @@ impl<'a> ParseTableBuilder<'a> {
                 )
                 .unwrap();
             }
+            write!(&mut msg, "\n").unwrap();
         }
 
         for item in &conflicting_items {
@@ -560,7 +559,7 @@ impl<'a> ParseTableBuilder<'a> {
                 resolution_count += 1;
                 write!(
                     &mut msg,
-                    "  {}: Specify a higher precedence in `{}` than in the other rules.\n",
+                    "  {}:  Specify a higher precedence in `{}` than in the other rules.\n",
                     resolution_count,
                     self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
                 )
@@ -571,7 +570,7 @@ impl<'a> ParseTableBuilder<'a> {
         resolution_count += 1;
         write!(
             &mut msg,
-            "  {}: Add a conflict for these rules: ",
+            "  {}:  Add a conflict for these rules: ",
             resolution_count
         )
         .unwrap();
@@ -714,10 +713,12 @@ pub(crate) fn build_parse_table(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
     inlines: &InlinedProductionMap,
+    state_ids_to_log: Vec<usize>,
 ) -> Result<(ParseTable, Vec<LookaheadSet>)> {
     ParseTableBuilder {
         syntax_grammar,
         lexical_grammar,
+        state_ids_to_log,
         item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
         state_ids_by_item_set: HashMap::new(),
         item_sets_by_state_id: Vec::new(),
diff --git a/src/build_tables/item.rs b/src/build_tables/item.rs
index d1d0cbbf..bbd5bbfa 100644
--- a/src/build_tables/item.rs
+++ b/src/build_tables/item.rs
@@ -45,7 +45,7 @@ pub(crate) struct ParseItemSet<'a> {
 pub(crate) struct ParseItemDisplay<'a>(
     pub &'a ParseItem<'a>,
     pub &'a SyntaxGrammar,
-    pub &'a LexicalGrammar
+    pub &'a LexicalGrammar,
 );
 
 pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
@@ -252,6 +252,13 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
         for (i, step) in self.0.production.steps.iter().enumerate() {
             if i == self.0.step_index as usize {
                 write!(f, " •")?;
+                if step.precedence != 0 || step.associativity.is_some() {
+                    write!(
+                        f,
+                        " (prec {:?} assoc {:?})",
+                        step.precedence, step.associativity
+                    )?;
+                }
             }
 
             write!(f, " ")?;
@@ -274,6 +281,15 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
 
         if self.0.is_done() {
             write!(f, " •")?;
+            if let Some(step) = self.0.production.steps.last() {
+                if step.precedence != 0 || step.associativity.is_some() {
+                    write!(
+                        f,
+                        " (prec {:?} assoc {:?})",
+                        step.precedence, step.associativity
+                    )?;
+                }
+            }
         }
 
         Ok(())
diff --git a/src/build_tables/mod.rs b/src/build_tables/mod.rs
index ed47665e..04b750e3 100644
--- a/src/build_tables/mod.rs
+++ b/src/build_tables/mod.rs
@@ -24,9 +24,10 @@ pub(crate) fn build_tables(
     simple_aliases: &AliasMap,
     inlines: &InlinedProductionMap,
     minimize: bool,
+    state_ids_to_log: Vec<usize>,
 ) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
     let (mut parse_table, following_tokens) =
-        build_parse_table(syntax_grammar, lexical_grammar, inlines)?;
+        build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?;
     let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
     let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
     let keywords = identify_keywords(
diff --git a/src/generate.rs b/src/generate.rs
index d574c165..aa8f3b5b 100644
--- a/src/generate.rs
+++ b/src/generate.rs
@@ -1,18 +1,24 @@
+use crate::build_tables::build_tables;
 use crate::error::Result;
 use crate::parse_grammar::parse_grammar;
 use crate::prepare_grammar::prepare_grammar;
-use crate::build_tables::build_tables;
 use crate::render::render_c_code;
 
-pub fn generate_parser_for_grammar(input: &str, minimize: bool) -> Result<String> {
+pub fn generate_parser_for_grammar(
+    input: &str,
+    minimize: bool,
+    state_ids_to_log: Vec<usize>,
+) -> Result<String> {
     let input_grammar = parse_grammar(input)?;
-    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+        prepare_grammar(&input_grammar)?;
     let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
         &syntax_grammar,
         &lexical_grammar,
         &simple_aliases,
         &inlines,
-        minimize
+        minimize,
+        state_ids_to_log,
     )?;
     let c_code = render_c_code(
         &input_grammar.name,
@@ -22,7 +28,7 @@ pub fn generate_parser_for_grammar(input: &str, minimize: bool) -> Result<String
         keyword_capture_token,
         syntax_grammar,
         lexical_grammar,
-        simple_aliases
+        simple_aliases,
     );
     Ok(c_code)
 }
diff --git a/src/main.rs b/src/main.rs
index c3dbf33d..11c277c3 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -12,6 +12,7 @@ use std::env;
 use std::io::Write;
 use std::path::PathBuf;
 use std::process::{exit, Command, Stdio};
+use std::usize;
 
 mod build_tables;
 mod error;
@@ -41,6 +42,11 @@ fn run() -> error::Result<()> {
             SubCommand::with_name("generate")
                 .about("Generate a parser")
                 .arg(Arg::with_name("log").long("log"))
+                .arg(
+                    Arg::with_name("state-ids-to-log")
+                        .long("log-state")
+                        .takes_value(true),
+                )
                 .arg(Arg::with_name("no-minimize").long("no-minimize")),
         )
         .subcommand(
@@ -63,10 +69,17 @@ fn run() -> error::Result<()> {
         }
 
         let minimize = !matches.is_present("no-minimize");
+        let state_ids_to_log = matches
+            .values_of("state-ids-to-log")
+            .map_or(Vec::new(), |ids| {
+                ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
+                    .collect()
+            });
         let mut grammar_path = env::current_dir().expect("Failed to read CWD");
         grammar_path.push("grammar.js");
         let grammar_json = load_js_grammar_file(grammar_path);
-        let code = generate::generate_parser_for_grammar(&grammar_json, minimize)?;
+        let code =
+            generate::generate_parser_for_grammar(&grammar_json, minimize, state_ids_to_log)?;
         println!("{}", code);
     }
 

From d8f8bd288eece27626c02407054b454b8102b7f8 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 15:27:15 -0800
Subject: [PATCH 100/208] Fix error in code generation w/ tokens that are
 internal and external

---
 src/render/mod.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/render/mod.rs b/src/render/mod.rs
index 8d3ee195..36429848 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -561,11 +561,13 @@ impl Generator {
         );
         indent!(self);
         for i in 0..self.syntax_grammar.external_tokens.len() {
+            let token = &self.syntax_grammar.external_tokens[i];
+            let id_token = token.corresponding_internal_token.unwrap_or(Symbol::external(i));
             add_line!(
                 self,
                 "[{}] = {},",
-                self.external_token_id(&self.syntax_grammar.external_tokens[i]),
-                self.symbol_ids[&Symbol::external(i)],
+                self.external_token_id(&token),
+                self.symbol_ids[&id_token],
             );
         }
         dedent!(self);

From b8dd5d2640f2011d016d0dfd750e804824771c68 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 15:27:35 -0800
Subject: [PATCH 101/208] Fix handling of precedence and associativity with
 inlining

---
 src/prepare_grammar/process_inlines.rs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/prepare_grammar/process_inlines.rs b/src/prepare_grammar/process_inlines.rs
index 9fd2f2c6..557b0fa4 100644
--- a/src/prepare_grammar/process_inlines.rs
+++ b/src/prepare_grammar/process_inlines.rs
@@ -90,7 +90,6 @@ impl InlinedProductionMapBuilder {
         while i < productions_to_add.len() {
             if let Some(step) = productions_to_add[i].steps.get(step_index) {
                 let symbol = step.symbol.clone();
-
                 if grammar.variables_to_inline.contains(&symbol) {
                     // Remove the production from the vector, replacing it with a placeholder.
                     let production = productions_to_add
@@ -116,8 +115,12 @@ impl InlinedProductionMapBuilder {
                                 }
                             }
                             if let Some(last_inserted_step) = inserted_steps.last_mut() {
-                                last_inserted_step.precedence = removed_step.precedence;
-                                last_inserted_step.associativity = removed_step.associativity;
+                                if last_inserted_step.precedence == 0 {
+                                    last_inserted_step.precedence = removed_step.precedence;
+                                }
+                                if last_inserted_step.associativity == None {
+                                    last_inserted_step.associativity = removed_step.associativity;
+                                }
                             }
                             production
                         }),

From 5b0e12ea332ebe231ba103b078f832f2ee2148c5 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 16:50:52 -0800
Subject: [PATCH 102/208] Move code into cli directory

---
 Cargo.toml                                    | 31 +++----------------
 cli/Cargo.toml                                | 27 ++++++++++++++++
 .../src}/build_tables/build_lex_table.rs      |  0
 .../src}/build_tables/build_parse_table.rs    |  0
 .../src}/build_tables/coincident_tokens.rs    |  0
 {src => cli/src}/build_tables/item.rs         |  0
 .../src}/build_tables/item_set_builder.rs     |  0
 .../src}/build_tables/minimize_parse_table.rs |  0
 {src => cli/src}/build_tables/mod.rs          |  0
 .../src}/build_tables/token_conflicts.rs      |  0
 {src => cli/src}/error.rs                     |  0
 {src => cli/src}/generate.rs                  |  0
 {src => cli/src}/grammars.rs                  |  0
 {src => cli/src}/js/dsl.js                    |  0
 {src => cli/src}/logger.rs                    |  0
 {src => cli/src}/main.rs                      |  0
 {src => cli/src}/nfa.rs                       |  0
 {src => cli/src}/parse_grammar.rs             |  0
 .../src}/prepare_grammar/expand_repeats.rs    |  0
 .../src}/prepare_grammar/expand_tokens.rs     |  0
 .../prepare_grammar/extract_simple_aliases.rs |  0
 .../src}/prepare_grammar/extract_tokens.rs    |  0
 .../src}/prepare_grammar/flatten_grammar.rs   |  0
 .../src}/prepare_grammar/intern_symbols.rs    |  0
 {src => cli/src}/prepare_grammar/mod.rs       |  0
 .../src}/prepare_grammar/process_inlines.rs   |  0
 {src => cli/src}/render/mod.rs                |  0
 {src => cli/src}/rules.rs                     |  0
 {src => cli/src}/tables.rs                    |  0
 29 files changed, 32 insertions(+), 26 deletions(-)
 create mode 100644 cli/Cargo.toml
 rename {src => cli/src}/build_tables/build_lex_table.rs (100%)
 rename {src => cli/src}/build_tables/build_parse_table.rs (100%)
 rename {src => cli/src}/build_tables/coincident_tokens.rs (100%)
 rename {src => cli/src}/build_tables/item.rs (100%)
 rename {src => cli/src}/build_tables/item_set_builder.rs (100%)
 rename {src => cli/src}/build_tables/minimize_parse_table.rs (100%)
 rename {src => cli/src}/build_tables/mod.rs (100%)
 rename {src => cli/src}/build_tables/token_conflicts.rs (100%)
 rename {src => cli/src}/error.rs (100%)
 rename {src => cli/src}/generate.rs (100%)
 rename {src => cli/src}/grammars.rs (100%)
 rename {src => cli/src}/js/dsl.js (100%)
 rename {src => cli/src}/logger.rs (100%)
 rename {src => cli/src}/main.rs (100%)
 rename {src => cli/src}/nfa.rs (100%)
 rename {src => cli/src}/parse_grammar.rs (100%)
 rename {src => cli/src}/prepare_grammar/expand_repeats.rs (100%)
 rename {src => cli/src}/prepare_grammar/expand_tokens.rs (100%)
 rename {src => cli/src}/prepare_grammar/extract_simple_aliases.rs (100%)
 rename {src => cli/src}/prepare_grammar/extract_tokens.rs (100%)
 rename {src => cli/src}/prepare_grammar/flatten_grammar.rs (100%)
 rename {src => cli/src}/prepare_grammar/intern_symbols.rs (100%)
 rename {src => cli/src}/prepare_grammar/mod.rs (100%)
 rename {src => cli/src}/prepare_grammar/process_inlines.rs (100%)
 rename {src => cli/src}/render/mod.rs (100%)
 rename {src => cli/src}/rules.rs (100%)
 rename {src => cli/src}/tables.rs (100%)

diff --git a/Cargo.toml b/Cargo.toml
index 29b10e17..75d3b403 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,27 +1,6 @@
-[package]
-name = "rust-tree-sitter-cli"
-version = "0.1.0"
-authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
-edition = "2018"
+[workspace]
 
-[dependencies]
-lazy_static = "1.2.0"
-smallbitvec = "2.3.0"
-clap = "2.32"
-dirs = "1.0.2"
-hashbrown = "0.1"
-ignore = "0.4.4"
-libloading = "0.5"
-rusqlite = "0.14.0"
-serde = "1.0"
-serde_derive = "1.0"
-tree-sitter = "0.3.1"
-regex-syntax = "0.6.4"
-
-[dependencies.serde_json]
-version = "1.0"
-features = ["preserve_order"]
-
-[dependencies.log]
-version = "0.4.6"
-features = ["std"]
+members = [
+    "cli",
+    "lib",
+]
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
new file mode 100644
index 00000000..29b10e17
--- /dev/null
+++ b/cli/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "rust-tree-sitter-cli"
+version = "0.1.0"
+authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+lazy_static = "1.2.0"
+smallbitvec = "2.3.0"
+clap = "2.32"
+dirs = "1.0.2"
+hashbrown = "0.1"
+ignore = "0.4.4"
+libloading = "0.5"
+rusqlite = "0.14.0"
+serde = "1.0"
+serde_derive = "1.0"
+tree-sitter = "0.3.1"
+regex-syntax = "0.6.4"
+
+[dependencies.serde_json]
+version = "1.0"
+features = ["preserve_order"]
+
+[dependencies.log]
+version = "0.4.6"
+features = ["std"]
diff --git a/src/build_tables/build_lex_table.rs b/cli/src/build_tables/build_lex_table.rs
similarity index 100%
rename from src/build_tables/build_lex_table.rs
rename to cli/src/build_tables/build_lex_table.rs
diff --git a/src/build_tables/build_parse_table.rs b/cli/src/build_tables/build_parse_table.rs
similarity index 100%
rename from src/build_tables/build_parse_table.rs
rename to cli/src/build_tables/build_parse_table.rs
diff --git a/src/build_tables/coincident_tokens.rs b/cli/src/build_tables/coincident_tokens.rs
similarity index 100%
rename from src/build_tables/coincident_tokens.rs
rename to cli/src/build_tables/coincident_tokens.rs
diff --git a/src/build_tables/item.rs b/cli/src/build_tables/item.rs
similarity index 100%
rename from src/build_tables/item.rs
rename to cli/src/build_tables/item.rs
diff --git a/src/build_tables/item_set_builder.rs b/cli/src/build_tables/item_set_builder.rs
similarity index 100%
rename from src/build_tables/item_set_builder.rs
rename to cli/src/build_tables/item_set_builder.rs
diff --git a/src/build_tables/minimize_parse_table.rs b/cli/src/build_tables/minimize_parse_table.rs
similarity index 100%
rename from src/build_tables/minimize_parse_table.rs
rename to cli/src/build_tables/minimize_parse_table.rs
diff --git a/src/build_tables/mod.rs b/cli/src/build_tables/mod.rs
similarity index 100%
rename from src/build_tables/mod.rs
rename to cli/src/build_tables/mod.rs
diff --git a/src/build_tables/token_conflicts.rs b/cli/src/build_tables/token_conflicts.rs
similarity index 100%
rename from src/build_tables/token_conflicts.rs
rename to cli/src/build_tables/token_conflicts.rs
diff --git a/src/error.rs b/cli/src/error.rs
similarity index 100%
rename from src/error.rs
rename to cli/src/error.rs
diff --git a/src/generate.rs b/cli/src/generate.rs
similarity index 100%
rename from src/generate.rs
rename to cli/src/generate.rs
diff --git a/src/grammars.rs b/cli/src/grammars.rs
similarity index 100%
rename from src/grammars.rs
rename to cli/src/grammars.rs
diff --git a/src/js/dsl.js b/cli/src/js/dsl.js
similarity index 100%
rename from src/js/dsl.js
rename to cli/src/js/dsl.js
diff --git a/src/logger.rs b/cli/src/logger.rs
similarity index 100%
rename from src/logger.rs
rename to cli/src/logger.rs
diff --git a/src/main.rs b/cli/src/main.rs
similarity index 100%
rename from src/main.rs
rename to cli/src/main.rs
diff --git a/src/nfa.rs b/cli/src/nfa.rs
similarity index 100%
rename from src/nfa.rs
rename to cli/src/nfa.rs
diff --git a/src/parse_grammar.rs b/cli/src/parse_grammar.rs
similarity index 100%
rename from src/parse_grammar.rs
rename to cli/src/parse_grammar.rs
diff --git a/src/prepare_grammar/expand_repeats.rs b/cli/src/prepare_grammar/expand_repeats.rs
similarity index 100%
rename from src/prepare_grammar/expand_repeats.rs
rename to cli/src/prepare_grammar/expand_repeats.rs
diff --git a/src/prepare_grammar/expand_tokens.rs b/cli/src/prepare_grammar/expand_tokens.rs
similarity index 100%
rename from src/prepare_grammar/expand_tokens.rs
rename to cli/src/prepare_grammar/expand_tokens.rs
diff --git a/src/prepare_grammar/extract_simple_aliases.rs b/cli/src/prepare_grammar/extract_simple_aliases.rs
similarity index 100%
rename from src/prepare_grammar/extract_simple_aliases.rs
rename to cli/src/prepare_grammar/extract_simple_aliases.rs
diff --git a/src/prepare_grammar/extract_tokens.rs b/cli/src/prepare_grammar/extract_tokens.rs
similarity index 100%
rename from src/prepare_grammar/extract_tokens.rs
rename to cli/src/prepare_grammar/extract_tokens.rs
diff --git a/src/prepare_grammar/flatten_grammar.rs b/cli/src/prepare_grammar/flatten_grammar.rs
similarity index 100%
rename from src/prepare_grammar/flatten_grammar.rs
rename to cli/src/prepare_grammar/flatten_grammar.rs
diff --git a/src/prepare_grammar/intern_symbols.rs b/cli/src/prepare_grammar/intern_symbols.rs
similarity index 100%
rename from src/prepare_grammar/intern_symbols.rs
rename to cli/src/prepare_grammar/intern_symbols.rs
diff --git a/src/prepare_grammar/mod.rs b/cli/src/prepare_grammar/mod.rs
similarity index 100%
rename from src/prepare_grammar/mod.rs
rename to cli/src/prepare_grammar/mod.rs
diff --git a/src/prepare_grammar/process_inlines.rs b/cli/src/prepare_grammar/process_inlines.rs
similarity index 100%
rename from src/prepare_grammar/process_inlines.rs
rename to cli/src/prepare_grammar/process_inlines.rs
diff --git a/src/render/mod.rs b/cli/src/render/mod.rs
similarity index 100%
rename from src/render/mod.rs
rename to cli/src/render/mod.rs
diff --git a/src/rules.rs b/cli/src/rules.rs
similarity index 100%
rename from src/rules.rs
rename to cli/src/rules.rs
diff --git a/src/tables.rs b/cli/src/tables.rs
similarity index 100%
rename from src/tables.rs
rename to cli/src/tables.rs

From dd416b09552fd5b09313072fb13452dd2a8d8fc0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 4 Jan 2019 17:33:34 -0800
Subject: [PATCH 103/208] Update include paths to not reference 'runtime'
 directory

---
 lib/src/array.h              |  2 +-
 lib/src/get_changed_ranges.c | 10 +++++-----
 lib/src/get_changed_ranges.h |  4 ++--
 lib/src/language.c           |  6 +++---
 lib/src/language.h           |  2 +-
 lib/src/length.h             |  2 +-
 lib/src/lexer.c              |  8 ++++----
 lib/src/lexer.h              |  4 ++--
 lib/src/node.c               |  6 +++---
 lib/src/parser.c             | 24 ++++++++++++------------
 lib/src/reduce_action.h      |  2 +-
 lib/src/reusable_node.h      |  2 +-
 lib/src/runtime.c            | 20 ++++++++++----------
 lib/src/stack.c              | 12 ++++++------
 lib/src/stack.h              |  6 +++---
 lib/src/subtree.c            | 12 ++++++------
 lib/src/subtree.h            |  6 +++---
 lib/src/tree.c               | 10 +++++-----
 lib/src/tree_cursor.c        |  8 ++++----
 lib/src/tree_cursor.h        |  2 +-
 lib/src/utf16.c              |  2 +-
 script/build-runtime         | 14 +++++++-------
 22 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/lib/src/array.h b/lib/src/array.h
index 60cfc800..3f5b6b29 100644
--- a/lib/src/array.h
+++ b/lib/src/array.h
@@ -10,7 +10,7 @@ extern "C" {
 #include <stdint.h>
 #include <assert.h>
 #include <stdbool.h>
-#include "runtime/alloc.h"
+#include "./alloc.h"
 
 #define Array(T)     \
   struct {           \
diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c
index 900d36ed..da39dd13 100644
--- a/lib/src/get_changed_ranges.c
+++ b/lib/src/get_changed_ranges.c
@@ -1,8 +1,8 @@
-#include "runtime/get_changed_ranges.h"
-#include "runtime/subtree.h"
-#include "runtime/language.h"
-#include "runtime/error_costs.h"
-#include "runtime/tree_cursor.h"
+#include "./get_changed_ranges.h"
+#include "./subtree.h"
+#include "./language.h"
+#include "./error_costs.h"
+#include "./tree_cursor.h"
 #include <assert.h>
 
 // #define DEBUG_GET_CHANGED_RANGES
diff --git a/lib/src/get_changed_ranges.h b/lib/src/get_changed_ranges.h
index e7fcead1..2764b55f 100644
--- a/lib/src/get_changed_ranges.h
+++ b/lib/src/get_changed_ranges.h
@@ -5,8 +5,8 @@
 extern "C" {
 #endif
 
-#include "runtime/tree_cursor.h"
-#include "runtime/subtree.h"
+#include "./tree_cursor.h"
+#include "./subtree.h"
 
 typedef Array(TSRange) TSRangeArray;
 
diff --git a/lib/src/language.c b/lib/src/language.c
index 0fb03b6c..9541bba2 100644
--- a/lib/src/language.c
+++ b/lib/src/language.c
@@ -1,6 +1,6 @@
-#include "runtime/language.h"
-#include "runtime/subtree.h"
-#include "runtime/error_costs.h"
+#include "./language.h"
+#include "./subtree.h"
+#include "./error_costs.h"
 #include <string.h>
 
 void ts_language_table_entry(const TSLanguage *self, TSStateId state,
diff --git a/lib/src/language.h b/lib/src/language.h
index 8386a054..c8e5e8a1 100644
--- a/lib/src/language.h
+++ b/lib/src/language.h
@@ -5,7 +5,7 @@
 extern "C" {
 #endif
 
-#include "runtime/subtree.h"
+#include "./subtree.h"
 #include "tree_sitter/parser.h"
 
 #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
diff --git a/lib/src/length.h b/lib/src/length.h
index 8dd1715e..db325f7a 100644
--- a/lib/src/length.h
+++ b/lib/src/length.h
@@ -3,7 +3,7 @@
 
 #include <stdlib.h>
 #include <stdbool.h>
-#include "runtime/point.h"
+#include "./point.h"
 #include "tree_sitter/runtime.h"
 
 typedef struct {
diff --git a/lib/src/lexer.c b/lib/src/lexer.c
index d2b9ad70..b33da344 100644
--- a/lib/src/lexer.c
+++ b/lib/src/lexer.c
@@ -1,8 +1,8 @@
 #include <stdio.h>
-#include "runtime/lexer.h"
-#include "runtime/subtree.h"
-#include "runtime/length.h"
-#include "runtime/utf16.h"
+#include "./lexer.h"
+#include "./subtree.h"
+#include "./length.h"
+#include "./utf16.h"
 #include "utf8proc.h"
 
 #define LOG(...)                                                                      \
diff --git a/lib/src/lexer.h b/lib/src/lexer.h
index 491c2da1..327350f6 100644
--- a/lib/src/lexer.h
+++ b/lib/src/lexer.h
@@ -5,8 +5,8 @@
 extern "C" {
 #endif
 
-#include "runtime/length.h"
-#include "runtime/subtree.h"
+#include "./length.h"
+#include "./subtree.h"
 #include "tree_sitter/runtime.h"
 #include "tree_sitter/parser.h"
 
diff --git a/lib/src/node.c b/lib/src/node.c
index c1763261..eb4a3121 100644
--- a/lib/src/node.c
+++ b/lib/src/node.c
@@ -1,7 +1,7 @@
 #include <stdbool.h>
-#include "runtime/subtree.h"
-#include "runtime/tree.h"
-#include "runtime/language.h"
+#include "./subtree.h"
+#include "./tree.h"
+#include "./language.h"
 
 typedef struct {
   Subtree parent;
diff --git a/lib/src/parser.c b/lib/src/parser.c
index c7050ce5..ef7f612d 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -3,18 +3,18 @@
 #include <limits.h>
 #include <stdbool.h>
 #include "tree_sitter/runtime.h"
-#include "runtime/subtree.h"
-#include "runtime/lexer.h"
-#include "runtime/length.h"
-#include "runtime/array.h"
-#include "runtime/language.h"
-#include "runtime/alloc.h"
-#include "runtime/stack.h"
-#include "runtime/reusable_node.h"
-#include "runtime/reduce_action.h"
-#include "runtime/error_costs.h"
-#include "runtime/get_changed_ranges.h"
-#include "runtime/tree.h"
+#include "./subtree.h"
+#include "./lexer.h"
+#include "./length.h"
+#include "./array.h"
+#include "./language.h"
+#include "./alloc.h"
+#include "./stack.h"
+#include "./reusable_node.h"
+#include "./reduce_action.h"
+#include "./error_costs.h"
+#include "./get_changed_ranges.h"
+#include "./tree.h"
 
 #define LOG(...)                                                                            \
   if (self->lexer.logger.log || self->dot_graph_file) {                                     \
diff --git a/lib/src/reduce_action.h b/lib/src/reduce_action.h
index 75267c3f..91835c39 100644
--- a/lib/src/reduce_action.h
+++ b/lib/src/reduce_action.h
@@ -5,7 +5,7 @@
 extern "C" {
 #endif
 
-#include "runtime/array.h"
+#include "./array.h"
 #include "tree_sitter/runtime.h"
 
 typedef struct {
diff --git a/lib/src/reusable_node.h b/lib/src/reusable_node.h
index cb9cea58..ab91cb36 100644
--- a/lib/src/reusable_node.h
+++ b/lib/src/reusable_node.h
@@ -1,4 +1,4 @@
-#include "runtime/subtree.h"
+#include "./subtree.h"
 
 typedef struct {
   Subtree tree;
diff --git a/lib/src/runtime.c b/lib/src/runtime.c
index 51455a8b..b29f5214 100644
--- a/lib/src/runtime.c
+++ b/lib/src/runtime.c
@@ -6,14 +6,14 @@
 //   - include
 //   - externals/utf8proc
 
-#include "runtime/get_changed_ranges.c"
-#include "runtime/language.c"
-#include "runtime/lexer.c"
-#include "runtime/node.c"
-#include "runtime/parser.c"
-#include "runtime/stack.c"
-#include "runtime/subtree.c"
-#include "runtime/tree_cursor.c"
-#include "runtime/tree.c"
-#include "runtime/utf16.c"
+#include "./get_changed_ranges.c"
+#include "./language.c"
+#include "./lexer.c"
+#include "./node.c"
+#include "./parser.c"
+#include "./stack.c"
+#include "./subtree.c"
+#include "./tree_cursor.c"
+#include "./tree.c"
+#include "./utf16.c"
 #include "utf8proc.c"
diff --git a/lib/src/stack.c b/lib/src/stack.c
index cc434e38..e3a1f22d 100644
--- a/lib/src/stack.c
+++ b/lib/src/stack.c
@@ -1,9 +1,9 @@
-#include "runtime/alloc.h"
-#include "runtime/language.h"
-#include "runtime/subtree.h"
-#include "runtime/array.h"
-#include "runtime/stack.h"
-#include "runtime/length.h"
+#include "./alloc.h"
+#include "./language.h"
+#include "./subtree.h"
+#include "./array.h"
+#include "./stack.h"
+#include "./length.h"
 #include <assert.h>
 #include <stdio.h>
 
diff --git a/lib/src/stack.h b/lib/src/stack.h
index 272bb4ee..d476d763 100644
--- a/lib/src/stack.h
+++ b/lib/src/stack.h
@@ -5,9 +5,9 @@
 extern "C" {
 #endif
 
-#include "runtime/array.h"
-#include "runtime/subtree.h"
-#include "runtime/error_costs.h"
+#include "./array.h"
+#include "./subtree.h"
+#include "./error_costs.h"
 #include <stdio.h>
 
 typedef struct Stack Stack;
diff --git a/lib/src/subtree.c b/lib/src/subtree.c
index eb7e0530..48c8cff3 100644
--- a/lib/src/subtree.c
+++ b/lib/src/subtree.c
@@ -4,12 +4,12 @@
 #include <stdbool.h>
 #include <string.h>
 #include <stdio.h>
-#include "runtime/alloc.h"
-#include "runtime/atomic.h"
-#include "runtime/subtree.h"
-#include "runtime/length.h"
-#include "runtime/language.h"
-#include "runtime/error_costs.h"
+#include "./alloc.h"
+#include "./atomic.h"
+#include "./subtree.h"
+#include "./length.h"
+#include "./language.h"
+#include "./error_costs.h"
 #include <stddef.h>
 
 typedef struct {
diff --git a/lib/src/subtree.h b/lib/src/subtree.h
index de3ddc16..cc5c79aa 100644
--- a/lib/src/subtree.h
+++ b/lib/src/subtree.h
@@ -7,9 +7,9 @@ extern "C" {
 
 #include <stdbool.h>
 #include <stdio.h>
-#include "runtime/length.h"
-#include "runtime/array.h"
-#include "runtime/error_costs.h"
+#include "./length.h"
+#include "./array.h"
+#include "./error_costs.h"
 #include "tree_sitter/runtime.h"
 #include "tree_sitter/parser.h"
 
diff --git a/lib/src/tree.c b/lib/src/tree.c
index e5122cc1..b729c8c7 100644
--- a/lib/src/tree.c
+++ b/lib/src/tree.c
@@ -1,9 +1,9 @@
 #include "tree_sitter/runtime.h"
-#include "runtime/array.h"
-#include "runtime/get_changed_ranges.h"
-#include "runtime/subtree.h"
-#include "runtime/tree_cursor.h"
-#include "runtime/tree.h"
+#include "./array.h"
+#include "./get_changed_ranges.h"
+#include "./subtree.h"
+#include "./tree_cursor.h"
+#include "./tree.h"
 
 static const unsigned PARENT_CACHE_CAPACITY = 32;
 
diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c
index 9fce48be..d352c32b 100644
--- a/lib/src/tree_cursor.c
+++ b/lib/src/tree_cursor.c
@@ -1,8 +1,8 @@
 #include "tree_sitter/runtime.h"
-#include "runtime/alloc.h"
-#include "runtime/tree_cursor.h"
-#include "runtime/language.h"
-#include "runtime/tree.h"
+#include "./alloc.h"
+#include "./tree_cursor.h"
+#include "./language.h"
+#include "./tree.h"
 
 typedef struct {
   Subtree parent;
diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h
index 84300b21..6e46b7dd 100644
--- a/lib/src/tree_cursor.h
+++ b/lib/src/tree_cursor.h
@@ -1,7 +1,7 @@
 #ifndef RUNTIME_TREE_CURSOR_H_
 #define RUNTIME_TREE_CURSOR_H_
 
-#include "runtime/subtree.h"
+#include "./subtree.h"
 
 typedef struct {
   const Subtree *subtree;
diff --git a/lib/src/utf16.c b/lib/src/utf16.c
index adb82edf..3956c01c 100644
--- a/lib/src/utf16.c
+++ b/lib/src/utf16.c
@@ -1,4 +1,4 @@
-#include "runtime/utf16.h"
+#include "./utf16.h"
 
 utf8proc_ssize_t utf16_iterate(
   const utf8proc_uint8_t *string,
diff --git a/script/build-runtime b/script/build-runtime
index 9e09b836..7b2e99f2 100755
--- a/script/build-runtime
+++ b/script/build-runtime
@@ -11,11 +11,11 @@ ${CC}                   \
   -c                    \
   -O3                   \
   -std=c99              \
-  -I src                \
-  -I include            \
-  -I externals/utf8proc \
-  src/runtime/runtime.c \
-  -o runtime.o
+  -I lib/src            \
+  -I lib/include        \
+  -I lib/utf8proc       \
+  lib/src/runtime.c     \
+  -o tree-sitter.o
 
-ar rcs libruntime.a runtime.o
-rm runtime.o
+ar rcs libtree-sitter.a tree-sitter.o
+rm tree-sitter.o

From 001f8c8f55a2a9a4c14c522ff12fcf27ae04c1e1 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 7 Jan 2019 08:39:47 -0800
Subject: [PATCH 104/208] Rename LookaheadSet -> TokenSet

Also, replace non-standard `with` method with a `FromIterator`
implementation.
---
 cli/src/build_tables/build_lex_table.rs      |  32 ++--
 cli/src/build_tables/build_parse_table.rs    |  16 +-
 cli/src/build_tables/item.rs                 |  39 +++--
 cli/src/build_tables/item_set_builder.rs     |  34 ++--
 cli/src/build_tables/minimize_parse_table.rs |   6 +-
 cli/src/build_tables/mod.rs                  | 170 ++++++++++---------
 cli/src/build_tables/token_conflicts.rs      |  18 +-
 7 files changed, 165 insertions(+), 150 deletions(-)

diff --git a/cli/src/build_tables/build_lex_table.rs b/cli/src/build_tables/build_lex_table.rs
index bcc1bf3d..9fc8edc6 100644
--- a/cli/src/build_tables/build_lex_table.rs
+++ b/cli/src/build_tables/build_lex_table.rs
@@ -1,4 +1,4 @@
-use super::item::LookaheadSet;
+use super::item::TokenSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
 use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
@@ -11,7 +11,7 @@ pub(crate) fn build_lex_table(
     parse_table: &mut ParseTable,
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
-    keywords: &LookaheadSet,
+    keywords: &TokenSet,
     minimize: bool,
 ) -> (LexTable, LexTable) {
     let keyword_lex_table;
@@ -25,19 +25,23 @@ pub(crate) fn build_lex_table(
 
     let mut builder = LexTableBuilder::new(lexical_grammar);
     for state in parse_table.states.iter_mut() {
-        let tokens = LookaheadSet::with(state.terminal_entries.keys().filter_map(|token| {
-            if token.is_terminal() {
-                if keywords.contains(&token) {
-                    syntax_grammar.word_token
-                } else {
+        let tokens = state
+            .terminal_entries
+            .keys()
+            .filter_map(|token| {
+                if token.is_terminal() {
+                    if keywords.contains(&token) {
+                        syntax_grammar.word_token
+                    } else {
+                        Some(*token)
+                    }
+                } else if token.is_eof() {
                     Some(*token)
+                } else {
+                    None
                 }
-            } else if token.is_eof() {
-                Some(*token)
-            } else {
-                None
-            }
-        }));
+            })
+            .collect();
         state.lex_state_id = builder.add_state_for_tokens(&tokens);
     }
 
@@ -75,7 +79,7 @@ impl<'a> LexTableBuilder<'a> {
         }
     }
 
-    fn add_state_for_tokens(&mut self, tokens: &LookaheadSet) -> usize {
+    fn add_state_for_tokens(&mut self, tokens: &TokenSet) -> usize {
         let mut eof_valid = false;
         let nfa_states = tokens
             .iter()
diff --git a/cli/src/build_tables/build_parse_table.rs b/cli/src/build_tables/build_parse_table.rs
index cda1d7ea..27baf146 100644
--- a/cli/src/build_tables/build_parse_table.rs
+++ b/cli/src/build_tables/build_parse_table.rs
@@ -1,4 +1,4 @@
-use super::item::{LookaheadSet, ParseItem, ParseItemSet};
+use super::item::{ParseItem, ParseItemSet, TokenSet};
 use super::item_set_builder::ParseItemSetBuilder;
 use crate::error::{Error, Result};
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
@@ -38,12 +38,12 @@ struct ParseTableBuilder<'a> {
     item_sets_by_state_id: Vec<ParseItemSet<'a>>,
     parse_state_queue: VecDeque<ParseStateQueueEntry>,
     parse_table: ParseTable,
-    following_tokens: Vec<LookaheadSet>,
+    following_tokens: Vec<TokenSet>,
     state_ids_to_log: Vec<ParseStateId>,
 }
 
 impl<'a> ParseTableBuilder<'a> {
-    fn build(mut self) -> Result<(ParseTable, Vec<LookaheadSet>)> {
+    fn build(mut self) -> Result<(ParseTable, Vec<TokenSet>)> {
         // Ensure that the empty alias sequence has index 0.
         self.parse_table.alias_sequences.push(Vec::new());
 
@@ -57,7 +57,7 @@ impl<'a> ParseTableBuilder<'a> {
             ParseItemSet::with(
                 [(
                     ParseItem::start(),
-                    LookaheadSet::with([Symbol::end()].iter().cloned()),
+                    [Symbol::end()].iter().cloned().collect(),
                 )]
                 .iter()
                 .cloned(),
@@ -174,7 +174,7 @@ impl<'a> ParseTableBuilder<'a> {
                         .or_insert_with(|| ParseItemSet::default())
                         .entries
                         .entry(successor)
-                        .or_insert_with(|| LookaheadSet::new())
+                        .or_insert_with(|| TokenSet::new())
                         .insert_all(lookaheads);
                 } else {
                     terminal_successors
@@ -182,7 +182,7 @@ impl<'a> ParseTableBuilder<'a> {
                         .or_insert_with(|| ParseItemSet::default())
                         .entries
                         .entry(successor)
-                        .or_insert_with(|| LookaheadSet::new())
+                        .or_insert_with(|| TokenSet::new())
                         .insert_all(lookaheads);
                 }
             } else {
@@ -714,7 +714,7 @@ pub(crate) fn build_parse_table(
     lexical_grammar: &LexicalGrammar,
     inlines: &InlinedProductionMap,
     state_ids_to_log: Vec<usize>,
-) -> Result<(ParseTable, Vec<LookaheadSet>)> {
+) -> Result<(ParseTable, Vec<TokenSet>)> {
     ParseTableBuilder {
         syntax_grammar,
         lexical_grammar,
@@ -729,7 +729,7 @@ pub(crate) fn build_parse_table(
             alias_sequences: Vec::new(),
             max_aliased_production_length: 0,
         },
-        following_tokens: vec![LookaheadSet::new(); lexical_grammar.variables.len()],
+        following_tokens: vec![TokenSet::new(); lexical_grammar.variables.len()],
     }
     .build()
 }
diff --git a/cli/src/build_tables/item.rs b/cli/src/build_tables/item.rs
index bbd5bbfa..5d6edc2f 100644
--- a/cli/src/build_tables/item.rs
+++ b/cli/src/build_tables/item.rs
@@ -6,6 +6,7 @@ use std::cmp::Ordering;
 use std::collections::BTreeMap;
 use std::fmt;
 use std::hash::{Hash, Hasher};
+use std::iter::FromIterator;
 use std::u32;
 
 lazy_static! {
@@ -24,7 +25,7 @@ lazy_static! {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct LookaheadSet {
+pub(crate) struct TokenSet {
     terminal_bits: SmallBitVec,
     external_bits: SmallBitVec,
     eof: bool,
@@ -39,7 +40,7 @@ pub(crate) struct ParseItem<'a> {
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct ParseItemSet<'a> {
-    pub entries: BTreeMap<ParseItem<'a>, LookaheadSet>,
+    pub entries: BTreeMap<ParseItem<'a>, TokenSet>,
 }
 
 pub(crate) struct ParseItemDisplay<'a>(
@@ -48,7 +49,7 @@ pub(crate) struct ParseItemDisplay<'a>(
     pub &'a LexicalGrammar,
 );
 
-pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
+pub(crate) struct TokenSetDisplay<'a>(&'a TokenSet, &'a SyntaxGrammar, &'a LexicalGrammar);
 
 #[allow(dead_code)]
 pub(crate) struct ParseItemSetDisplay<'a>(
@@ -57,7 +58,7 @@ pub(crate) struct ParseItemSetDisplay<'a>(
     pub &'a LexicalGrammar,
 );
 
-impl LookaheadSet {
+impl TokenSet {
     pub fn new() -> Self {
         Self {
             terminal_bits: SmallBitVec::new(),
@@ -92,17 +93,9 @@ impl LookaheadSet {
             .chain(if self.eof { Some(Symbol::end()) } else { None })
     }
 
-    pub fn with(symbols: impl IntoIterator<Item = Symbol>) -> Self {
-        let mut result = Self::new();
-        for symbol in symbols {
-            result.insert(symbol);
-        }
-        result
-    }
-
     pub fn contains(&self, symbol: &Symbol) -> bool {
         match symbol.kind {
-            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
             SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
             SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
             SymbolType::End => self.eof,
@@ -111,7 +104,7 @@ impl LookaheadSet {
 
     pub fn insert(&mut self, other: Symbol) {
         let vec = match other.kind {
-            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
+            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
             SymbolType::Terminal => &mut self.terminal_bits,
             SymbolType::External => &mut self.external_bits,
             SymbolType::End => {
@@ -125,7 +118,7 @@ impl LookaheadSet {
         vec.set(other.index, true);
     }
 
-    pub fn insert_all(&mut self, other: &LookaheadSet) -> bool {
+    pub fn insert_all(&mut self, other: &TokenSet) -> bool {
         let mut result = false;
         if other.terminal_bits.len() > self.terminal_bits.len() {
             self.terminal_bits.resize(other.terminal_bits.len(), false);
@@ -153,6 +146,16 @@ impl LookaheadSet {
     }
 }
 
+impl FromIterator<Symbol> for TokenSet {
+    fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
+        let mut result = Self::new();
+        for symbol in iter {
+            result.insert(symbol);
+        }
+        result
+    }
+}
+
 impl<'a> ParseItem<'a> {
     pub fn start() -> Self {
         ParseItem {
@@ -204,7 +207,7 @@ impl<'a> ParseItem<'a> {
 }
 
 impl<'a> ParseItemSet<'a> {
-    pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, LookaheadSet)>) -> Self {
+    pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
         let mut result = Self::default();
         for (item, lookaheads) in elements {
             result.entries.insert(item, lookaheads);
@@ -296,7 +299,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
     }
 }
 
-impl<'a> fmt::Display for LookaheadSetDisplay<'a> {
+impl<'a> fmt::Display for TokenSetDisplay<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         write!(f, "[")?;
         for (i, symbol) in self.0.iter().enumerate() {
@@ -328,7 +331,7 @@ impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
                 f,
                 "{}\t{}",
                 ParseItemDisplay(item, self.1, self.2),
-                LookaheadSetDisplay(lookaheads, self.1, self.2)
+                TokenSetDisplay(lookaheads, self.1, self.2)
             )?;
         }
         Ok(())
diff --git a/cli/src/build_tables/item_set_builder.rs b/cli/src/build_tables/item_set_builder.rs
index 939d700c..fea3b4d1 100644
--- a/cli/src/build_tables/item_set_builder.rs
+++ b/cli/src/build_tables/item_set_builder.rs
@@ -1,4 +1,4 @@
-use super::item::{LookaheadSet, ParseItem, ParseItemDisplay, ParseItemSet};
+use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet};
 use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
 use crate::rules::Symbol;
 use hashbrown::{HashMap, HashSet};
@@ -12,15 +12,15 @@ struct TransitiveClosureAddition<'a> {
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 struct FollowSetInfo {
-    lookaheads: LookaheadSet,
+    lookaheads: TokenSet,
     propagates_lookaheads: bool,
 }
 
 pub(crate) struct ParseItemSetBuilder<'a> {
     syntax_grammar: &'a SyntaxGrammar,
     lexical_grammar: &'a LexicalGrammar,
-    first_sets: HashMap<Symbol, LookaheadSet>,
-    last_sets: HashMap<Symbol, LookaheadSet>,
+    first_sets: HashMap<Symbol, TokenSet>,
+    last_sets: HashMap<Symbol, TokenSet>,
     inlines: &'a InlinedProductionMap,
     transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
 }
@@ -54,7 +54,7 @@ impl<'a> ParseItemSetBuilder<'a> {
         // terminal itself.
         for i in 0..lexical_grammar.variables.len() {
             let symbol = Symbol::terminal(i);
-            let mut set = LookaheadSet::new();
+            let mut set = TokenSet::new();
             set.insert(symbol);
             result.first_sets.insert(symbol, set.clone());
             result.last_sets.insert(symbol, set);
@@ -62,7 +62,7 @@ impl<'a> ParseItemSetBuilder<'a> {
 
         for i in 0..syntax_grammar.external_tokens.len() {
             let symbol = Symbol::external(i);
-            let mut set = LookaheadSet::new();
+            let mut set = TokenSet::new();
             set.insert(symbol);
             result.first_sets.insert(symbol, set.clone());
             result.last_sets.insert(symbol, set);
@@ -80,10 +80,7 @@ impl<'a> ParseItemSetBuilder<'a> {
         for i in 0..syntax_grammar.variables.len() {
             let symbol = Symbol::non_terminal(i);
 
-            let first_set = &mut result
-                .first_sets
-                .entry(symbol)
-                .or_insert(LookaheadSet::new());
+            let first_set = &mut result.first_sets.entry(symbol).or_insert(TokenSet::new());
             processed_non_terminals.clear();
             symbols_to_process.clear();
             symbols_to_process.push(symbol);
@@ -103,10 +100,7 @@ impl<'a> ParseItemSetBuilder<'a> {
             }
 
             // The LAST set is defined in a similar way to the FIRST set.
-            let last_set = &mut result
-                .last_sets
-                .entry(symbol)
-                .or_insert(LookaheadSet::new());
+            let last_set = &mut result.last_sets.entry(symbol).or_insert(TokenSet::new());
             processed_non_terminals.clear();
             symbols_to_process.clear();
             symbols_to_process.push(symbol);
@@ -148,7 +142,7 @@ impl<'a> ParseItemSetBuilder<'a> {
         // Again, rather than computing these additions recursively, we use an explicit
         // stack called `entries_to_process`.
         for i in 0..syntax_grammar.variables.len() {
-            let empty_lookaheads = LookaheadSet::new();
+            let empty_lookaheads = TokenSet::new();
             let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
 
             // First, build up a map whose keys are all of the non-terminals that can
@@ -160,7 +154,7 @@ impl<'a> ParseItemSetBuilder<'a> {
                 let existing_info = follow_set_info_by_non_terminal
                     .entry(variable_index)
                     .or_insert_with(|| FollowSetInfo {
-                        lookaheads: LookaheadSet::new(),
+                        lookaheads: TokenSet::new(),
                         propagates_lookaheads: false,
                     });
 
@@ -269,15 +263,15 @@ impl<'a> ParseItemSetBuilder<'a> {
         result
     }
 
-    pub fn first_set(&self, symbol: &Symbol) -> &LookaheadSet {
+    pub fn first_set(&self, symbol: &Symbol) -> &TokenSet {
         &self.first_sets[symbol]
     }
 
-    pub fn last_set(&self, symbol: &Symbol) -> &LookaheadSet {
+    pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
         &self.first_sets[symbol]
     }
 
-    fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &LookaheadSet) {
+    fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
         if let Some(step) = item.step() {
             if step.symbol.is_non_terminal() {
                 let next_step = item.successor().step();
@@ -294,7 +288,7 @@ impl<'a> ParseItemSetBuilder<'a> {
                     let lookaheads = set
                         .entries
                         .entry(addition.item)
-                        .or_insert_with(|| LookaheadSet::new());
+                        .or_insert_with(|| TokenSet::new());
                     lookaheads.insert_all(&addition.info.lookaheads);
                     if addition.info.propagates_lookaheads {
                         lookaheads.insert_all(following_tokens);
diff --git a/cli/src/build_tables/minimize_parse_table.rs b/cli/src/build_tables/minimize_parse_table.rs
index 573bf974..d83e117f 100644
--- a/cli/src/build_tables/minimize_parse_table.rs
+++ b/cli/src/build_tables/minimize_parse_table.rs
@@ -1,4 +1,4 @@
-use super::item::LookaheadSet;
+use super::item::TokenSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{SyntaxGrammar, VariableType};
 use crate::rules::{AliasMap, Symbol};
@@ -10,7 +10,7 @@ pub(crate) fn minimize_parse_table(
     syntax_grammar: &SyntaxGrammar,
     simple_aliases: &AliasMap,
     token_conflict_map: &TokenConflictMap,
-    keywords: &LookaheadSet,
+    keywords: &TokenSet,
 ) {
     let mut minimizer = Minimizer {
         parse_table,
@@ -28,7 +28,7 @@ struct Minimizer<'a> {
     parse_table: &'a mut ParseTable,
     syntax_grammar: &'a SyntaxGrammar,
     token_conflict_map: &'a TokenConflictMap<'a>,
-    keywords: &'a LookaheadSet,
+    keywords: &'a TokenSet,
     simple_aliases: &'a AliasMap,
 }
 
diff --git a/cli/src/build_tables/mod.rs b/cli/src/build_tables/mod.rs
index 04b750e3..c632aa7b 100644
--- a/cli/src/build_tables/mod.rs
+++ b/cli/src/build_tables/mod.rs
@@ -9,7 +9,7 @@ mod token_conflicts;
 use self::build_lex_table::build_lex_table;
 use self::build_parse_table::build_parse_table;
 use self::coincident_tokens::CoincidentTokenIndex;
-use self::item::LookaheadSet;
+use self::item::TokenSet;
 use self::minimize_parse_table::minimize_parse_table;
 use self::token_conflicts::TokenConflictMap;
 use crate::error::Result;
@@ -44,11 +44,7 @@ pub(crate) fn build_tables(
         &coincident_token_index,
         &token_conflict_map,
     );
-    mark_fragile_tokens(
-        &mut parse_table,
-        lexical_grammar,
-        &token_conflict_map,
-    );
+    mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
     if minimize {
         minimize_parse_table(
             &mut parse_table,
@@ -85,22 +81,25 @@ fn populate_error_state(
 
     // First identify the *conflict-free tokens*: tokens that do not overlap with
     // any other token in any way.
-    let conflict_free_tokens = LookaheadSet::with((0..n).into_iter().filter_map(|i| {
-        let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
-            j != i
-                && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
-                && token_conflict_map.does_conflict(i, j)
-        });
-        if conflicts_with_other_tokens {
-            None
-        } else {
-            info!(
-                "error recovery - token {} has no conflicts",
-                lexical_grammar.variables[i].name
-            );
-            Some(Symbol::terminal(i))
-        }
-    }));
+    let conflict_free_tokens: TokenSet = (0..n)
+        .into_iter()
+        .filter_map(|i| {
+            let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
+                j != i
+                    && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
+                    && token_conflict_map.does_conflict(i, j)
+            });
+            if conflicts_with_other_tokens {
+                None
+            } else {
+                info!(
+                    "error recovery - token {} has no conflicts",
+                    lexical_grammar.variables[i].name
+                );
+                Some(Symbol::terminal(i))
+            }
+        })
+        .collect();
 
     let recover_entry = ParseTableEntry {
         reusable: false,
@@ -153,9 +152,9 @@ fn identify_keywords(
     word_token: Option<Symbol>,
     token_conflict_map: &TokenConflictMap,
     coincident_token_index: &CoincidentTokenIndex,
-) -> LookaheadSet {
+) -> TokenSet {
     if word_token.is_none() {
-        return LookaheadSet::new();
+        return TokenSet::new();
     }
 
     let word_token = word_token.unwrap();
@@ -163,8 +162,11 @@ fn identify_keywords(
 
     // First find all of the candidate keyword tokens: tokens that start with
     // letters or underscore and can match the same string as a word token.
-    let keywords = LookaheadSet::with(lexical_grammar.variables.iter().enumerate().filter_map(
-        |(i, variable)| {
+    let keywords: TokenSet = lexical_grammar
+        .variables
+        .iter()
+        .enumerate()
+        .filter_map(|(i, variable)| {
             cursor.reset(vec![variable.start_state]);
             if all_chars_are_alphabetical(&cursor)
                 && token_conflict_map.does_match_same_string(i, word_token.index)
@@ -177,69 +179,75 @@ fn identify_keywords(
             } else {
                 None
             }
-        },
-    ));
+        })
+        .collect();
 
     // Exclude keyword candidates that shadow another keyword candidate.
-    let keywords = LookaheadSet::with(keywords.iter().filter(|token| {
-        for other_token in keywords.iter() {
-            if other_token != *token
-                && token_conflict_map.does_match_same_string(token.index, other_token.index)
-            {
-                info!(
-                    "Keywords - exclude {} because it matches the same string as {}",
-                    lexical_grammar.variables[token.index].name,
-                    lexical_grammar.variables[other_token.index].name
-                );
-                return false;
+    let keywords: TokenSet = keywords
+        .iter()
+        .filter(|token| {
+            for other_token in keywords.iter() {
+                if other_token != *token
+                    && token_conflict_map.does_match_same_string(token.index, other_token.index)
+                {
+                    info!(
+                        "Keywords - exclude {} because it matches the same string as {}",
+                        lexical_grammar.variables[token.index].name,
+                        lexical_grammar.variables[other_token.index].name
+                    );
+                    return false;
+                }
             }
-        }
-        true
-    }));
+            true
+        })
+        .collect();
 
     // Exclude keyword candidates for which substituting the keyword capture
     // token would introduce new lexical conflicts with other tokens.
-    let keywords = LookaheadSet::with(keywords.iter().filter(|token| {
-        for other_index in 0..lexical_grammar.variables.len() {
-            if keywords.contains(&Symbol::terminal(other_index)) {
-                continue;
+    let keywords = keywords
+        .iter()
+        .filter(|token| {
+            for other_index in 0..lexical_grammar.variables.len() {
+                if keywords.contains(&Symbol::terminal(other_index)) {
+                    continue;
+                }
+
+                // If the word token was already valid in every state containing
+                // this keyword candidate, then substituting the word token won't
+                // introduce any new lexical conflicts.
+                if coincident_token_index
+                    .states_with(*token, Symbol::terminal(other_index))
+                    .iter()
+                    .all(|state_id| {
+                        parse_table.states[*state_id]
+                            .terminal_entries
+                            .contains_key(&word_token)
+                    })
+                {
+                    continue;
+                }
+
+                if !token_conflict_map.has_same_conflict_status(
+                    token.index,
+                    word_token.index,
+                    other_index,
+                ) {
+                    info!(
+                        "Keywords - exclude {} because of conflict with {}",
+                        lexical_grammar.variables[token.index].name,
+                        lexical_grammar.variables[other_index].name
+                    );
+                    return false;
+                }
             }
 
-            // If the word token was already valid in every state containing
-            // this keyword candidate, then substituting the word token won't
-            // introduce any new lexical conflicts.
-            if coincident_token_index
-                .states_with(*token, Symbol::terminal(other_index))
-                .iter()
-                .all(|state_id| {
-                    parse_table.states[*state_id]
-                        .terminal_entries
-                        .contains_key(&word_token)
-                })
-            {
-                continue;
-            }
-
-            if !token_conflict_map.has_same_conflict_status(
-                token.index,
-                word_token.index,
-                other_index,
-            ) {
-                info!(
-                    "Keywords - exclude {} because of conflict with {}",
-                    lexical_grammar.variables[token.index].name,
-                    lexical_grammar.variables[other_index].name
-                );
-                return false;
-            }
-        }
-
-        info!(
-            "Keywords - include {}",
-            lexical_grammar.variables[token.index].name,
-        );
-        true
-    }));
+            info!(
+                "Keywords - include {}",
+                lexical_grammar.variables[token.index].name,
+            );
+            true
+        })
+        .collect();
 
     keywords
 }
diff --git a/cli/src/build_tables/token_conflicts.rs b/cli/src/build_tables/token_conflicts.rs
index cb2b6efe..7bb443a5 100644
--- a/cli/src/build_tables/token_conflicts.rs
+++ b/cli/src/build_tables/token_conflicts.rs
@@ -1,4 +1,4 @@
-use crate::build_tables::item::LookaheadSet;
+use crate::build_tables::item::TokenSet;
 use crate::grammars::LexicalGrammar;
 use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
 use hashbrown::HashSet;
@@ -22,7 +22,7 @@ pub(crate) struct TokenConflictMap<'a> {
 }
 
 impl<'a> TokenConflictMap<'a> {
-    pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<LookaheadSet>) -> Self {
+    pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
         let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
         let starting_chars = get_starting_chars(&mut cursor, grammar);
         let following_chars = get_following_chars(&starting_chars, following_tokens);
@@ -141,7 +141,7 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
 
 fn get_following_chars(
     starting_chars: &Vec<CharacterSet>,
-    following_tokens: Vec<LookaheadSet>,
+    following_tokens: Vec<TokenSet>,
 ) -> Vec<CharacterSet> {
     following_tokens
         .into_iter()
@@ -352,9 +352,15 @@ mod tests {
         let token_map = TokenConflictMap::new(
             &grammar,
             vec![
-                LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
-                LookaheadSet::with([Symbol::terminal(var("in"))].iter().cloned()),
-                LookaheadSet::with([Symbol::terminal(var("identifier"))].iter().cloned()),
+                [Symbol::terminal(var("identifier"))]
+                    .iter()
+                    .cloned()
+                    .collect(),
+                [Symbol::terminal(var("in"))].iter().cloned().collect(),
+                [Symbol::terminal(var("identifier"))]
+                    .iter()
+                    .cloned()
+                    .collect(),
             ],
         );
 

From 4e29fe69df7b8ee1d1fb032f8662082b0d9da872 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 7 Jan 2019 09:59:04 -0800
Subject: [PATCH 105/208] Reduce lex table size by merging compatible entry
 point states

---
 cli/src/build_tables/build_lex_table.rs | 65 +++++++++++++++++++++++--
 cli/src/build_tables/item.rs            | 17 +++++++
 cli/src/build_tables/mod.rs             |  2 +
 3 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/cli/src/build_tables/build_lex_table.rs b/cli/src/build_tables/build_lex_table.rs
index 9fc8edc6..0f828f5c 100644
--- a/cli/src/build_tables/build_lex_table.rs
+++ b/cli/src/build_tables/build_lex_table.rs
@@ -1,9 +1,10 @@
+use super::coincident_tokens::CoincidentTokenIndex;
 use super::item::TokenSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::grammars::{LexicalGrammar, SyntaxGrammar};
 use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
 use crate::rules::Symbol;
-use crate::tables::{AdvanceAction, LexState, LexTable, ParseTable};
+use crate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
 use std::collections::hash_map::Entry;
 use std::collections::{BTreeMap, HashMap, VecDeque};
 
@@ -12,6 +13,8 @@ pub(crate) fn build_lex_table(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
     keywords: &TokenSet,
+    coincident_token_index: &CoincidentTokenIndex,
+    token_conflict_map: &TokenConflictMap,
     minimize: bool,
 ) -> (LexTable, LexTable) {
     let keyword_lex_table;
@@ -23,8 +26,8 @@ pub(crate) fn build_lex_table(
         keyword_lex_table = LexTable::default();
     }
 
-    let mut builder = LexTableBuilder::new(lexical_grammar);
-    for state in parse_table.states.iter_mut() {
+    let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec<ParseStateId>)> = Vec::new();
+    for (i, state) in parse_table.states.iter().enumerate() {
         let tokens = state
             .terminal_entries
             .keys()
@@ -42,7 +45,33 @@ pub(crate) fn build_lex_table(
                 }
             })
             .collect();
-        state.lex_state_id = builder.add_state_for_tokens(&tokens);
+
+        let mut did_merge = false;
+        for entry in parse_state_ids_by_token_set.iter_mut() {
+            if merge_token_set(
+                &mut entry.0,
+                &tokens,
+                lexical_grammar,
+                token_conflict_map,
+                coincident_token_index,
+            ) {
+                did_merge = true;
+                entry.1.push(i);
+                break;
+            }
+        }
+
+        if !did_merge {
+            parse_state_ids_by_token_set.push((tokens, vec![i]));
+        }
+    }
+
+    let mut builder = LexTableBuilder::new(lexical_grammar);
+    for (tokens, parse_state_ids) in parse_state_ids_by_token_set {
+        let lex_state_id = builder.add_state_for_tokens(&tokens);
+        for id in parse_state_ids {
+            parse_table.states[id].lex_state_id = lex_state_id;
+        }
     }
 
     let mut table = builder.table;
@@ -215,6 +244,34 @@ impl<'a> LexTableBuilder<'a> {
     }
 }
 
+fn merge_token_set(
+    tokens: &mut TokenSet,
+    other: &TokenSet,
+    lexical_grammar: &LexicalGrammar,
+    token_conflict_map: &TokenConflictMap,
+    coincident_token_index: &CoincidentTokenIndex,
+) -> bool {
+    for i in 0..lexical_grammar.variables.len() {
+        let symbol = Symbol::terminal(i);
+        let set_without_terminal = match (tokens.contains_terminal(i), other.contains_terminal(i)) {
+            (true, false) => other,
+            (false, true) => tokens,
+            _ => continue,
+        };
+
+        for existing_token in set_without_terminal.terminals() {
+            if token_conflict_map.does_conflict(i, existing_token.index)
+                || !coincident_token_index.contains(symbol, existing_token)
+            {
+                return false;
+            }
+        }
+    }
+
+    tokens.insert_all(other);
+    true
+}
+
 fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
     let mut state_replacements = BTreeMap::new();
     let mut done = false;
diff --git a/cli/src/build_tables/item.rs b/cli/src/build_tables/item.rs
index 5d6edc2f..2be331b0 100644
--- a/cli/src/build_tables/item.rs
+++ b/cli/src/build_tables/item.rs
@@ -93,6 +93,19 @@ impl TokenSet {
             .chain(if self.eof { Some(Symbol::end()) } else { None })
     }
 
+    pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
+        self.terminal_bits
+            .iter()
+            .enumerate()
+            .filter_map(|(i, value)| {
+                if value {
+                    Some(Symbol::terminal(i))
+                } else {
+                    None
+                }
+            })
+    }
+
     pub fn contains(&self, symbol: &Symbol) -> bool {
         match symbol.kind {
             SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
@@ -102,6 +115,10 @@ impl TokenSet {
         }
     }
 
+    pub fn contains_terminal(&self, index: usize) -> bool {
+        self.terminal_bits.get(index).unwrap_or(false)
+    }
+
     pub fn insert(&mut self, other: Symbol) {
         let vec = match other.kind {
             SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
diff --git a/cli/src/build_tables/mod.rs b/cli/src/build_tables/mod.rs
index c632aa7b..1f9acc14 100644
--- a/cli/src/build_tables/mod.rs
+++ b/cli/src/build_tables/mod.rs
@@ -59,6 +59,8 @@ pub(crate) fn build_tables(
         syntax_grammar,
         lexical_grammar,
         &keywords,
+        &coincident_token_index,
+        &token_conflict_map,
         minimize,
     );
     Ok((

From f059557a9df750340eb87ca087c1df5d3b0fbd11 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 7 Jan 2019 10:23:01 -0800
Subject: [PATCH 106/208] Move parser generation code in to 'generate' module
 within CLI crate

---
 cli/src/generate.rs                           | 34 --------
 .../build_tables/build_lex_table.rs           |  8 +-
 .../build_tables/build_parse_table.rs         |  6 +-
 .../build_tables/coincident_tokens.rs         |  6 +-
 cli/src/{ => generate}/build_tables/item.rs   |  6 +-
 .../build_tables/item_set_builder.rs          |  4 +-
 .../build_tables/minimize_parse_table.rs      |  6 +-
 cli/src/{ => generate}/build_tables/mod.rs    |  8 +-
 .../build_tables/token_conflicts.rs           | 12 +--
 cli/src/{js => generate}/dsl.js               |  0
 cli/src/{ => generate}/grammar-schema.json    |  0
 cli/src/{ => generate}/grammars.rs            |  4 +-
 cli/src/generate/mod.rs                       | 79 +++++++++++++++++++
 cli/src/{ => generate}/nfa.rs                 |  0
 cli/src/{ => generate}/parse_grammar.rs       | 66 ++++++++++------
 .../prepare_grammar/expand_repeats.rs         |  4 +-
 .../prepare_grammar/expand_tokens.rs          | 10 +--
 .../prepare_grammar/extract_simple_aliases.rs |  8 +-
 .../prepare_grammar/extract_tokens.rs         |  6 +-
 .../prepare_grammar/flatten_grammar.rs        |  8 +-
 .../prepare_grammar/intern_symbols.rs         |  4 +-
 cli/src/{ => generate}/prepare_grammar/mod.rs |  4 +-
 .../prepare_grammar/process_inlines.rs        |  6 +-
 cli/src/{render/mod.rs => generate/render.rs} |  8 +-
 cli/src/{ => generate}/rules.rs               |  0
 cli/src/{ => generate}/tables.rs              |  4 +-
 cli/src/main.rs                               | 55 ++-----------
 script/check-mallocs                          |  2 +-
 28 files changed, 187 insertions(+), 171 deletions(-)
 delete mode 100644 cli/src/generate.rs
 rename cli/src/{ => generate}/build_tables/build_lex_table.rs (97%)
 rename cli/src/{ => generate}/build_tables/build_parse_table.rs (99%)
 rename cli/src/{ => generate}/build_tables/coincident_tokens.rs (93%)
 rename cli/src/{ => generate}/build_tables/item.rs (98%)
 rename cli/src/{ => generate}/build_tables/item_set_builder.rs (99%)
 rename cli/src/{ => generate}/build_tables/minimize_parse_table.rs (98%)
 rename cli/src/{ => generate}/build_tables/mod.rs (97%)
 rename cli/src/{ => generate}/build_tables/token_conflicts.rs (97%)
 rename cli/src/{js => generate}/dsl.js (100%)
 rename cli/src/{ => generate}/grammar-schema.json (100%)
 rename cli/src/{ => generate}/grammars.rs (98%)
 create mode 100644 cli/src/generate/mod.rs
 rename cli/src/{ => generate}/nfa.rs (100%)
 rename cli/src/{ => generate}/parse_grammar.rs (73%)
 rename cli/src/{ => generate}/prepare_grammar/expand_repeats.rs (98%)
 rename cli/src/{ => generate}/prepare_grammar/expand_tokens.rs (98%)
 rename cli/src/{ => generate}/prepare_grammar/extract_simple_aliases.rs (96%)
 rename cli/src/{ => generate}/prepare_grammar/extract_tokens.rs (98%)
 rename cli/src/{ => generate}/prepare_grammar/flatten_grammar.rs (97%)
 rename cli/src/{ => generate}/prepare_grammar/intern_symbols.rs (98%)
 rename cli/src/{ => generate}/prepare_grammar/mod.rs (95%)
 rename cli/src/{ => generate}/prepare_grammar/process_inlines.rs (98%)
 rename cli/src/{render/mod.rs => generate/render.rs} (99%)
 rename cli/src/{ => generate}/rules.rs (100%)
 rename cli/src/{ => generate}/tables.rs (97%)

diff --git a/cli/src/generate.rs b/cli/src/generate.rs
deleted file mode 100644
index aa8f3b5b..00000000
--- a/cli/src/generate.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-use crate::build_tables::build_tables;
-use crate::error::Result;
-use crate::parse_grammar::parse_grammar;
-use crate::prepare_grammar::prepare_grammar;
-use crate::render::render_c_code;
-
-pub fn generate_parser_for_grammar(
-    input: &str,
-    minimize: bool,
-    state_ids_to_log: Vec<usize>,
-) -> Result<String> {
-    let input_grammar = parse_grammar(input)?;
-    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
-        prepare_grammar(&input_grammar)?;
-    let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
-        &syntax_grammar,
-        &lexical_grammar,
-        &simple_aliases,
-        &inlines,
-        minimize,
-        state_ids_to_log,
-    )?;
-    let c_code = render_c_code(
-        &input_grammar.name,
-        parse_table,
-        main_lex_table,
-        keyword_lex_table,
-        keyword_capture_token,
-        syntax_grammar,
-        lexical_grammar,
-        simple_aliases,
-    );
-    Ok(c_code)
-}
diff --git a/cli/src/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs
similarity index 97%
rename from cli/src/build_tables/build_lex_table.rs
rename to cli/src/generate/build_tables/build_lex_table.rs
index 0f828f5c..200c6959 100644
--- a/cli/src/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@@ -1,10 +1,10 @@
 use super::coincident_tokens::CoincidentTokenIndex;
 use super::item::TokenSet;
 use super::token_conflicts::TokenConflictMap;
-use crate::grammars::{LexicalGrammar, SyntaxGrammar};
-use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
-use crate::rules::Symbol;
-use crate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
+use crate::generate::rules::Symbol;
+use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
 use std::collections::hash_map::Entry;
 use std::collections::{BTreeMap, HashMap, VecDeque};
 
diff --git a/cli/src/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs
similarity index 99%
rename from cli/src/build_tables/build_parse_table.rs
rename to cli/src/generate/build_tables/build_parse_table.rs
index 27baf146..73c9c0e2 100644
--- a/cli/src/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@@ -1,9 +1,9 @@
 use super::item::{ParseItem, ParseItemSet, TokenSet};
 use super::item_set_builder::ParseItemSetBuilder;
 use crate::error::{Error, Result};
-use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
-use crate::rules::{Alias, Associativity, Symbol, SymbolType};
-use crate::tables::{
+use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
+use crate::generate::tables::{
     AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
 };
 use core::ops::Range;
diff --git a/cli/src/build_tables/coincident_tokens.rs b/cli/src/generate/build_tables/coincident_tokens.rs
similarity index 93%
rename from cli/src/build_tables/coincident_tokens.rs
rename to cli/src/generate/build_tables/coincident_tokens.rs
index 62295073..25dbc331 100644
--- a/cli/src/build_tables/coincident_tokens.rs
+++ b/cli/src/generate/build_tables/coincident_tokens.rs
@@ -1,6 +1,6 @@
-use crate::grammars::LexicalGrammar;
-use crate::rules::Symbol;
-use crate::tables::{ParseStateId, ParseTable};
+use crate::generate::grammars::LexicalGrammar;
+use crate::generate::rules::Symbol;
+use crate::generate::tables::{ParseStateId, ParseTable};
 use std::fmt;
 
 pub(crate) struct CoincidentTokenIndex<'a> {
diff --git a/cli/src/build_tables/item.rs b/cli/src/generate/build_tables/item.rs
similarity index 98%
rename from cli/src/build_tables/item.rs
rename to cli/src/generate/build_tables/item.rs
index 2be331b0..81c86f4a 100644
--- a/cli/src/build_tables/item.rs
+++ b/cli/src/generate/build_tables/item.rs
@@ -1,6 +1,6 @@
-use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
-use crate::rules::Associativity;
-use crate::rules::{Symbol, SymbolType};
+use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
+use crate::generate::rules::Associativity;
+use crate::generate::rules::{Symbol, SymbolType};
 use smallbitvec::SmallBitVec;
 use std::cmp::Ordering;
 use std::collections::BTreeMap;
diff --git a/cli/src/build_tables/item_set_builder.rs b/cli/src/generate/build_tables/item_set_builder.rs
similarity index 99%
rename from cli/src/build_tables/item_set_builder.rs
rename to cli/src/generate/build_tables/item_set_builder.rs
index fea3b4d1..56d7c7c4 100644
--- a/cli/src/build_tables/item_set_builder.rs
+++ b/cli/src/generate/build_tables/item_set_builder.rs
@@ -1,6 +1,6 @@
 use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet};
-use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
-use crate::rules::Symbol;
+use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::generate::rules::Symbol;
 use hashbrown::{HashMap, HashSet};
 use std::fmt;
 
diff --git a/cli/src/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs
similarity index 98%
rename from cli/src/build_tables/minimize_parse_table.rs
rename to cli/src/generate/build_tables/minimize_parse_table.rs
index d83e117f..007c9703 100644
--- a/cli/src/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@@ -1,8 +1,8 @@
 use super::item::TokenSet;
 use super::token_conflicts::TokenConflictMap;
-use crate::grammars::{SyntaxGrammar, VariableType};
-use crate::rules::{AliasMap, Symbol};
-use crate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
+use crate::generate::grammars::{SyntaxGrammar, VariableType};
+use crate::generate::rules::{AliasMap, Symbol};
+use crate::generate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
 use hashbrown::{HashMap, HashSet};
 
 pub(crate) fn minimize_parse_table(
diff --git a/cli/src/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
similarity index 97%
rename from cli/src/build_tables/mod.rs
rename to cli/src/generate/build_tables/mod.rs
index 1f9acc14..7d55d0fa 100644
--- a/cli/src/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -13,10 +13,10 @@ use self::item::TokenSet;
 use self::minimize_parse_table::minimize_parse_table;
 use self::token_conflicts::TokenConflictMap;
 use crate::error::Result;
-use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
-use crate::nfa::{CharacterSet, NfaCursor};
-use crate::rules::{AliasMap, Symbol};
-use crate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
+use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
+use crate::generate::nfa::{CharacterSet, NfaCursor};
+use crate::generate::rules::{AliasMap, Symbol};
+use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
 
 pub(crate) fn build_tables(
     syntax_grammar: &SyntaxGrammar,
diff --git a/cli/src/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs
similarity index 97%
rename from cli/src/build_tables/token_conflicts.rs
rename to cli/src/generate/build_tables/token_conflicts.rs
index 7bb443a5..1a63bfc8 100644
--- a/cli/src/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@@ -1,6 +1,6 @@
-use crate::build_tables::item::TokenSet;
-use crate::grammars::LexicalGrammar;
-use crate::nfa::{CharacterSet, NfaCursor, NfaTransition};
+use crate::generate::build_tables::item::TokenSet;
+use crate::generate::grammars::LexicalGrammar;
+use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
 use hashbrown::HashSet;
 use std::cmp::Ordering;
 use std::fmt;
@@ -288,9 +288,9 @@ fn variable_ids_for_states<'a>(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::grammars::{Variable, VariableType};
-    use crate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
-    use crate::rules::{Rule, Symbol};
+    use crate::generate::grammars::{Variable, VariableType};
+    use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
+    use crate::generate::rules::{Rule, Symbol};
 
     #[test]
     fn test_starting_characters() {
diff --git a/cli/src/js/dsl.js b/cli/src/generate/dsl.js
similarity index 100%
rename from cli/src/js/dsl.js
rename to cli/src/generate/dsl.js
diff --git a/cli/src/grammar-schema.json b/cli/src/generate/grammar-schema.json
similarity index 100%
rename from cli/src/grammar-schema.json
rename to cli/src/generate/grammar-schema.json
diff --git a/cli/src/grammars.rs b/cli/src/generate/grammars.rs
similarity index 98%
rename from cli/src/grammars.rs
rename to cli/src/generate/grammars.rs
index f82d6b02..3772bfd4 100644
--- a/cli/src/grammars.rs
+++ b/cli/src/generate/grammars.rs
@@ -1,5 +1,5 @@
-use crate::nfa::Nfa;
-use crate::rules::{Alias, Associativity, Rule, Symbol};
+use super::nfa::Nfa;
+use super::rules::{Alias, Associativity, Rule, Symbol};
 use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
new file mode 100644
index 00000000..7dfe5a4b
--- /dev/null
+++ b/cli/src/generate/mod.rs
@@ -0,0 +1,79 @@
+use self::build_tables::build_tables;
+use self::parse_grammar::parse_grammar;
+use self::prepare_grammar::prepare_grammar;
+use self::render::render_c_code;
+use crate::error::Result;
+use std::io::Write;
+use std::path::PathBuf;
+use std::process::{Command, Stdio};
+
+mod build_tables;
+mod grammars;
+mod nfa;
+mod parse_grammar;
+mod prepare_grammar;
+mod render;
+mod rules;
+mod tables;
+
+pub fn generate_parser_for_grammar(
+    grammar_path: &PathBuf,
+    minimize: bool,
+    state_ids_to_log: Vec<usize>,
+) -> Result<String> {
+    let grammar_json = load_js_grammar_file(grammar_path);
+    let input_grammar = parse_grammar(&grammar_json)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+        prepare_grammar(&input_grammar)?;
+    let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
+        &syntax_grammar,
+        &lexical_grammar,
+        &simple_aliases,
+        &inlines,
+        minimize,
+        state_ids_to_log,
+    )?;
+    let c_code = render_c_code(
+        &input_grammar.name,
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        keyword_capture_token,
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases,
+    );
+    Ok(c_code)
+}
+
+fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
+    let mut node_process = Command::new("node")
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("Failed to run `node`");
+
+    let js_prelude = include_str!("./dsl.js");
+    let mut node_stdin = node_process
+        .stdin
+        .take()
+        .expect("Failed to open stdin for node");
+    write!(
+        node_stdin,
+        "{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
+        js_prelude,
+        grammar_path.to_str().unwrap()
+    )
+    .expect("Failed to write to node's stdin");
+    drop(node_stdin);
+    let output = node_process
+        .wait_with_output()
+        .expect("Failed to read output from node");
+    match output.status.code() {
+        None => panic!("Node process was killed"),
+        Some(0) => {}
+        Some(code) => panic!(format!("Node process exited with status {}", code)),
+    }
+
+    String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
+}
diff --git a/cli/src/nfa.rs b/cli/src/generate/nfa.rs
similarity index 100%
rename from cli/src/nfa.rs
rename to cli/src/generate/nfa.rs
diff --git a/cli/src/parse_grammar.rs b/cli/src/generate/parse_grammar.rs
similarity index 73%
rename from cli/src/parse_grammar.rs
rename to cli/src/generate/parse_grammar.rs
index 6808f402..e77dce9b 100644
--- a/cli/src/parse_grammar.rs
+++ b/cli/src/generate/parse_grammar.rs
@@ -1,7 +1,7 @@
-use serde_json::{Map, Value};
+use super::grammars::{InputGrammar, Variable, VariableType};
+use super::rules::Rule;
 use crate::error::Result;
-use crate::grammars::{InputGrammar, Variable, VariableType};
-use crate::rules::Rule;
+use serde_json::{Map, Value};
 
 #[derive(Deserialize)]
 #[serde(tag = "type")]
@@ -81,20 +81,20 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
         })
     }
 
-    let extra_tokens = grammar_json.extras
+    let extra_tokens = grammar_json
+        .extras
         .unwrap_or(Vec::new())
         .into_iter()
         .map(parse_rule)
         .collect();
-    let external_tokens = grammar_json.externals
+    let external_tokens = grammar_json
+        .externals
         .unwrap_or(Vec::new())
         .into_iter()
         .map(parse_rule)
         .collect();
-    let expected_conflicts = grammar_json.conflicts
-        .unwrap_or(Vec::new());
-    let variables_to_inline = grammar_json.inline
-        .unwrap_or(Vec::new());
+    let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new());
+    let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new());
 
     Ok(InputGrammar {
         name: grammar_json.name,
@@ -109,7 +109,11 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
 
 fn parse_rule(json: RuleJSON) -> Rule {
     match json {
-        RuleJSON::ALIAS { content, value, named } => Rule::alias(parse_rule(*content), value, named),
+        RuleJSON::ALIAS {
+            content,
+            value,
+            named,
+        } => Rule::alias(parse_rule(*content), value, named),
         RuleJSON::BLANK => Rule::Blank,
         RuleJSON::STRING { value } => Rule::String(value),
         RuleJSON::PATTERN { value } => Rule::Pattern(value),
@@ -117,11 +121,15 @@ fn parse_rule(json: RuleJSON) -> Rule {
         RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
         RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
         RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
-        RuleJSON::REPEAT { content } => Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank]),
+        RuleJSON::REPEAT { content } => {
+            Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank])
+        }
         RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)),
         RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)),
         RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)),
-        RuleJSON::PREC_DYNAMIC { value, content } => Rule::prec_dynamic(value, parse_rule(*content)),
+        RuleJSON::PREC_DYNAMIC { value, content } => {
+            Rule::prec_dynamic(value, parse_rule(*content))
+        }
         RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
         RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
     }
@@ -133,7 +141,8 @@ mod tests {
 
     #[test]
     fn test_parse_grammar() {
-        let grammar = parse_grammar(r#"{
+        let grammar = parse_grammar(
+            r#"{
             "name": "my_lang",
             "rules": {
                 "file": {
@@ -148,20 +157,25 @@ mod tests {
                     "value": "foo"
                 }
             }
-        }"#).unwrap();
+        }"#,
+        )
+        .unwrap();
 
         assert_eq!(grammar.name, "my_lang");
-        assert_eq!(grammar.variables, vec![
-            Variable {
-                name: "file".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
-            },
-            Variable {
-                name: "statement".to_string(),
-                kind: VariableType::Named,
-                rule: Rule::String("foo".to_string())
-            },
-        ]);
+        assert_eq!(
+            grammar.variables,
+            vec![
+                Variable {
+                    name: "file".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
+                },
+                Variable {
+                    name: "statement".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::String("foo".to_string())
+                },
+            ]
+        );
     }
 }
diff --git a/cli/src/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs
similarity index 98%
rename from cli/src/prepare_grammar/expand_repeats.rs
rename to cli/src/generate/prepare_grammar/expand_repeats.rs
index 4589bd11..b290799b 100644
--- a/cli/src/prepare_grammar/expand_repeats.rs
+++ b/cli/src/generate/prepare_grammar/expand_repeats.rs
@@ -1,6 +1,6 @@
 use super::ExtractedSyntaxGrammar;
-use crate::grammars::{Variable, VariableType};
-use crate::rules::{Rule, Symbol};
+use crate::generate::grammars::{Variable, VariableType};
+use crate::generate::rules::{Rule, Symbol};
 use hashbrown::HashMap;
 use std::mem;
 
diff --git a/cli/src/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
similarity index 98%
rename from cli/src/prepare_grammar/expand_tokens.rs
rename to cli/src/generate/prepare_grammar/expand_tokens.rs
index 2678df19..d1922dc0 100644
--- a/cli/src/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -1,8 +1,8 @@
 use super::ExtractedLexicalGrammar;
 use crate::error::{Error, Result};
-use crate::grammars::{LexicalGrammar, LexicalVariable};
-use crate::nfa::{CharacterSet, Nfa, NfaState};
-use crate::rules::Rule;
+use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
+use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
+use crate::generate::rules::Rule;
 use regex_syntax::ast::{
     parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
 };
@@ -366,8 +366,8 @@ impl NfaBuilder {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::grammars::Variable;
-    use crate::nfa::{NfaCursor, NfaTransition};
+    use crate::generate::grammars::Variable;
+    use crate::generate::nfa::{NfaCursor, NfaTransition};
 
     fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
         let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
diff --git a/cli/src/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs
similarity index 96%
rename from cli/src/prepare_grammar/extract_simple_aliases.rs
rename to cli/src/generate/prepare_grammar/extract_simple_aliases.rs
index aa8b3f77..84c535b9 100644
--- a/cli/src/prepare_grammar/extract_simple_aliases.rs
+++ b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs
@@ -1,5 +1,5 @@
-use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
-use crate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
 
 #[derive(Clone, Default)]
 struct SymbolStatus {
@@ -83,8 +83,8 @@ pub(super) fn extract_simple_aliases(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep};
-    use crate::nfa::Nfa;
+    use crate::generate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep};
+    use crate::generate::nfa::Nfa;
 
     #[test]
     fn test_extract_simple_aliases() {
diff --git a/cli/src/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs
similarity index 98%
rename from cli/src/prepare_grammar/extract_tokens.rs
rename to cli/src/generate/prepare_grammar/extract_tokens.rs
index 5a54d34e..ae07763b 100644
--- a/cli/src/prepare_grammar/extract_tokens.rs
+++ b/cli/src/generate/prepare_grammar/extract_tokens.rs
@@ -1,7 +1,7 @@
 use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
 use crate::error::{Error, Result};
-use crate::grammars::{ExternalToken, Variable, VariableType};
-use crate::rules::{MetadataParams, Rule, Symbol, SymbolType};
+use crate::generate::grammars::{ExternalToken, Variable, VariableType};
+use crate::generate::rules::{MetadataParams, Rule, Symbol, SymbolType};
 use hashbrown::HashMap;
 use std::mem;
 
@@ -311,7 +311,7 @@ impl SymbolReplacer {
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::grammars::VariableType;
+    use crate::generate::grammars::VariableType;
 
     #[test]
     fn test_extraction() {
diff --git a/cli/src/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs
similarity index 97%
rename from cli/src/prepare_grammar/flatten_grammar.rs
rename to cli/src/generate/prepare_grammar/flatten_grammar.rs
index 3ffef086..9409a010 100644
--- a/cli/src/prepare_grammar/flatten_grammar.rs
+++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs
@@ -1,7 +1,7 @@
 use super::ExtractedSyntaxGrammar;
 use crate::error::Result;
-use crate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
-use crate::rules::{Alias, Associativity, Rule};
+use crate::generate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
+use crate::generate::rules::{Alias, Associativity, Rule};
 
 struct RuleFlattener {
     production: Production,
@@ -163,8 +163,8 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::grammars::VariableType;
-    use crate::rules::Symbol;
+    use crate::generate::grammars::VariableType;
+    use crate::generate::rules::Symbol;
 
     #[test]
     fn test_flatten_grammar() {
diff --git a/cli/src/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs
similarity index 98%
rename from cli/src/prepare_grammar/intern_symbols.rs
rename to cli/src/generate/prepare_grammar/intern_symbols.rs
index 2e6f5b1c..a466935b 100644
--- a/cli/src/prepare_grammar/intern_symbols.rs
+++ b/cli/src/generate/prepare_grammar/intern_symbols.rs
@@ -1,7 +1,7 @@
 use super::InternedGrammar;
 use crate::error::{Error, Result};
-use crate::grammars::{InputGrammar, Variable, VariableType};
-use crate::rules::{Rule, Symbol};
+use crate::generate::grammars::{InputGrammar, Variable, VariableType};
+use crate::generate::rules::{Rule, Symbol};
 
 pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
     let interner = Interner { grammar };
diff --git a/cli/src/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs
similarity index 95%
rename from cli/src/prepare_grammar/mod.rs
rename to cli/src/generate/prepare_grammar/mod.rs
index b0c1d2a3..41f668f4 100644
--- a/cli/src/prepare_grammar/mod.rs
+++ b/cli/src/generate/prepare_grammar/mod.rs
@@ -14,10 +14,10 @@ use self::flatten_grammar::flatten_grammar;
 use self::intern_symbols::intern_symbols;
 use self::process_inlines::process_inlines;
 use crate::error::Result;
-use crate::grammars::{
+use crate::generate::grammars::{
     ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable,
 };
-use crate::rules::{AliasMap, Rule, Symbol};
+use crate::generate::rules::{AliasMap, Rule, Symbol};
 
 pub(crate) struct IntermediateGrammar<T, U> {
     variables: Vec<Variable>,
diff --git a/cli/src/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs
similarity index 98%
rename from cli/src/prepare_grammar/process_inlines.rs
rename to cli/src/generate/prepare_grammar/process_inlines.rs
index 557b0fa4..3c0f529a 100644
--- a/cli/src/prepare_grammar/process_inlines.rs
+++ b/cli/src/generate/prepare_grammar/process_inlines.rs
@@ -1,4 +1,4 @@
-use crate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
+use crate::generate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar};
 use hashbrown::HashMap;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@@ -184,8 +184,8 @@ pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
-    use crate::rules::{Associativity, Symbol};
+    use crate::generate::grammars::{ProductionStep, SyntaxVariable, VariableType};
+    use crate::generate::rules::{Associativity, Symbol};
 
     #[test]
     fn test_basic_inlining() {
diff --git a/cli/src/render/mod.rs b/cli/src/generate/render.rs
similarity index 99%
rename from cli/src/render/mod.rs
rename to cli/src/generate/render.rs
index 36429848..5e0d2b67 100644
--- a/cli/src/render/mod.rs
+++ b/cli/src/generate/render.rs
@@ -1,7 +1,7 @@
-use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
-use crate::nfa::CharacterSet;
-use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
-use crate::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
+use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
+use super::nfa::CharacterSet;
+use super::rules::{Alias, AliasMap, Symbol, SymbolType};
+use super::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
 use core::ops::Range;
 use hashbrown::{HashMap, HashSet};
 use std::fmt::Write;
diff --git a/cli/src/rules.rs b/cli/src/generate/rules.rs
similarity index 100%
rename from cli/src/rules.rs
rename to cli/src/generate/rules.rs
diff --git a/cli/src/tables.rs b/cli/src/generate/tables.rs
similarity index 97%
rename from cli/src/tables.rs
rename to cli/src/generate/tables.rs
index edbbaaab..6c3da68e 100644
--- a/cli/src/tables.rs
+++ b/cli/src/generate/tables.rs
@@ -1,5 +1,5 @@
-use crate::nfa::CharacterSet;
-use crate::rules::{Alias, Associativity, Symbol};
+use super::nfa::CharacterSet;
+use super::rules::{Alias, Associativity, Symbol};
 use hashbrown::HashMap;
 
 pub(crate) type AliasSequenceId = usize;
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 11c277c3..fe6ffd8c 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -7,24 +7,14 @@ extern crate serde_derive;
 extern crate hashbrown;
 extern crate serde_json;
 
-use clap::{App, Arg, SubCommand};
-use std::env;
-use std::io::Write;
-use std::path::PathBuf;
-use std::process::{exit, Command, Stdio};
-use std::usize;
-
-mod build_tables;
 mod error;
 mod generate;
-mod grammars;
 mod logger;
-mod nfa;
-mod parse_grammar;
-mod prepare_grammar;
-mod render;
-mod rules;
-mod tables;
+
+use clap::{App, Arg, SubCommand};
+use std::env;
+use std::process::exit;
+use std::usize;
 
 fn main() {
     if let Err(e) = run() {
@@ -77,43 +67,10 @@ fn run() -> error::Result<()> {
             });
         let mut grammar_path = env::current_dir().expect("Failed to read CWD");
         grammar_path.push("grammar.js");
-        let grammar_json = load_js_grammar_file(grammar_path);
         let code =
-            generate::generate_parser_for_grammar(&grammar_json, minimize, state_ids_to_log)?;
+            generate::generate_parser_for_grammar(&grammar_path, minimize, state_ids_to_log)?;
         println!("{}", code);
     }
 
     Ok(())
 }
-
-fn load_js_grammar_file(grammar_path: PathBuf) -> String {
-    let mut node_process = Command::new("node")
-        .stdin(Stdio::piped())
-        .stdout(Stdio::piped())
-        .spawn()
-        .expect("Failed to run `node`");
-
-    let js_prelude = include_str!("./js/dsl.js");
-    let mut node_stdin = node_process
-        .stdin
-        .take()
-        .expect("Failed to open stdin for node");
-    write!(
-        node_stdin,
-        "{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
-        js_prelude,
-        grammar_path.to_str().unwrap()
-    )
-    .expect("Failed to write to node's stdin");
-    drop(node_stdin);
-    let output = node_process
-        .wait_with_output()
-        .expect("Failed to read output from node");
-    match output.status.code() {
-        None => panic!("Node process was killed"),
-        Some(0) => {}
-        Some(code) => panic!(format!("Node process exited with status {}", code)),
-    }
-
-    String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
-}
diff --git a/script/check-mallocs b/script/check-mallocs
index 0bd064d0..889861d8 100755
--- a/script/check-mallocs
+++ b/script/check-mallocs
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-src_dir="src/runtime"
+src_dir="lib/src"
 
 allocation_functions=(
   malloc

From 8291d294fb0b251addc745c90863e22792f5cc28 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 7 Jan 2019 17:57:27 -0800
Subject: [PATCH 107/208] Add test subcommand

Co-Authored-By: Timothy Clem <timothy.clem@gmail.com>
---
 Cargo.lock        |   2 +
 cli/Cargo.toml    |   2 +
 cli/src/error.rs  |  14 +++
 cli/src/loader.rs | 241 ++++++++++++++++++++++++++++++++++++++++++++++
 cli/src/main.rs   |  34 +++++--
 cli/src/test.rs   | 212 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 496 insertions(+), 9 deletions(-)
 create mode 100644 cli/src/loader.rs
 create mode 100644 cli/src/test.rs

diff --git a/Cargo.lock b/Cargo.lock
index 758dcad7..7a249312 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -641,6 +641,7 @@ dependencies = [
 name = "tree-sitter-cli"
 version = "0.1.0"
 dependencies = [
+ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -648,6 +649,7 @@ dependencies = [
  "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 6a9c253d..200fd2f1 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -9,6 +9,7 @@ name = "tree-sitter"
 path = "src/main.rs"
 
 [dependencies]
+ansi_term = "0.11"
 lazy_static = "1.2.0"
 smallbitvec = "2.3.0"
 clap = "2.32"
@@ -20,6 +21,7 @@ rusqlite = "0.14.0"
 serde = "1.0"
 serde_derive = "1.0"
 regex-syntax = "0.6.4"
+regex = "1"
 
 [dependencies.tree-sitter]
 path = "../lib"
diff --git a/cli/src/error.rs b/cli/src/error.rs
index 9a5801f8..1b8b1a79 100644
--- a/cli/src/error.rs
+++ b/cli/src/error.rs
@@ -1,3 +1,5 @@
+use std::io;
+
 #[derive(Debug)]
 pub struct Error(pub String);
 
@@ -22,3 +24,15 @@ impl From<serde_json::Error> for Error {
         Error(error.to_string())
     }
 }
+
+impl From<io::Error> for Error {
+    fn from(error: io::Error) -> Self {
+        Error(error.to_string())
+    }
+}
+
+impl From<String> for Error {
+    fn from(error: String) -> Self {
+        Error(error)
+    }
+}
diff --git a/cli/src/loader.rs b/cli/src/loader.rs
new file mode 100644
index 00000000..7dfb233b
--- /dev/null
+++ b/cli/src/loader.rs
@@ -0,0 +1,241 @@
+use libloading::{Library, Symbol};
+use regex::{Regex, RegexBuilder};
+use std::collections::HashMap;
+use std::fs;
+use std::io;
+use std::mem;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use tree_sitter::{Language, PropertySheet};
+
+const PACKAGE_JSON_PATH: &'static str = "package.json";
+const PARSER_C_PATH: &'static str = "src/parser.c";
+const SCANNER_C_PATH: &'static str = "src/scanner.c";
+const SCANNER_CC_PATH: &'static str = "src/scanner.cc";
+
+#[cfg(unix)]
+const DYLIB_EXTENSION: &'static str = "so";
+
+#[cfg(windows)]
+const DYLIB_EXTENSION: &'static str = "dll";
+
+struct LanguageRepo {
+    name: String,
+    path: PathBuf,
+    language: Option<Language>,
+    configurations: Vec<LanguageConfiguration>,
+}
+
+pub struct LanguageConfiguration {
+    name: String,
+    content_regex: Option<Regex>,
+    first_line_regex: Option<Regex>,
+    file_types: Vec<String>,
+    highlight_property_sheet: Option<Result<PropertySheet, PathBuf>>,
+}
+
+pub struct Loader {
+    parser_lib_path: PathBuf,
+    language_repos: Vec<LanguageRepo>,
+    language_configuration_indices_by_file_type: HashMap<String, Vec<(usize, usize)>>,
+}
+
+unsafe impl Send for Loader {}
+unsafe impl Sync for Loader {}
+
+impl Loader {
+    pub fn new(parser_lib_path: PathBuf) -> Self {
+        Loader {
+            parser_lib_path,
+            language_repos: Vec::new(),
+            language_configuration_indices_by_file_type: HashMap::new(),
+        }
+    }
+
+    pub fn find_parsers(&mut self, parser_src_paths: &Vec<PathBuf>) -> io::Result<()> {
+        for parser_container_dir in parser_src_paths.iter() {
+            for entry in fs::read_dir(parser_container_dir)? {
+                let entry = entry?;
+                if let Some(parser_dir_name) = entry.file_name().to_str() {
+                    if parser_dir_name.starts_with("tree-sitter-") {
+                        if self.load_language_configurations(
+                            &parser_container_dir.join(parser_dir_name),
+                        ).is_err() {
+                            eprintln!("Error loading {}", parser_dir_name);
+                        }
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    pub fn language_configuration_at_path(
+        &mut self,
+        path: &Path,
+    ) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
+        let repo_index = self.load_language_configurations(path)?;
+        self.load_language_from_repo(repo_index, 0)
+    }
+
+    pub fn language_for_file_name(
+        &mut self,
+        path: &Path,
+    ) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
+        let indices = path
+            .file_name()
+            .and_then(|n| n.to_str())
+            .and_then(|file_name| {
+                self.language_configuration_indices_by_file_type
+                    .get(file_name)
+            })
+            .or_else(|| {
+                path.extension()
+                    .and_then(|extension| extension.to_str())
+                    .and_then(|extension| {
+                        self.language_configuration_indices_by_file_type
+                            .get(extension)
+                    })
+            });
+
+        if let Some(indices) = indices {
+            // TODO use `content-regex` to pick one
+            for (repo_index, conf_index) in indices {
+                return self.load_language_from_repo(*repo_index, *conf_index);
+            }
+        }
+        Ok(None)
+    }
+
+    fn load_language_from_repo(
+        &mut self,
+        repo_index: usize,
+        conf_index: usize,
+    ) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
+        let repo = &self.language_repos[repo_index];
+        let language = if let Some(language) = repo.language {
+            language
+        } else {
+            let language = self.load_language_at_path(&repo.name, &repo.path)?;
+            self.language_repos[repo_index].language = Some(language);
+            language
+        };
+        if let Some(configuration) = self.language_repos[repo_index]
+            .configurations
+            .get(conf_index)
+        {
+            Ok(Some((language, configuration)))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn load_language_at_path(&self, name: &str, language_path: &Path) -> io::Result<Language> {
+        let parser_c_path = language_path.join(PARSER_C_PATH);
+        let mut library_path = self.parser_lib_path.join(name);
+        library_path.set_extension(DYLIB_EXTENSION);
+
+        if !library_path.exists() || was_modified_more_recently(&parser_c_path, &library_path)? {
+            let compiler_name = std::env::var("CXX").unwrap_or("c++".to_owned());
+            let mut command = Command::new(compiler_name);
+            command
+                .arg("-shared")
+                .arg("-fPIC")
+                .arg("-I")
+                .arg(language_path.join("src"))
+                .arg("-o")
+                .arg(&library_path)
+                .arg("-xc")
+                .arg(parser_c_path);
+            let scanner_c_path = language_path.join(SCANNER_C_PATH);
+            let scanner_cc_path = language_path.join(SCANNER_CC_PATH);
+            if scanner_c_path.exists() {
+                command.arg("-xc").arg(scanner_c_path);
+            } else if scanner_cc_path.exists() {
+                command.arg("-xc++").arg(scanner_cc_path);
+            }
+            command.output()?;
+        }
+
+        let library = Library::new(library_path)?;
+        let language_fn_name = format!("tree_sitter_{}", name);
+        let language = unsafe {
+            let language_fn: Symbol<unsafe extern "C" fn() -> Language> =
+                library.get(language_fn_name.as_bytes())?;
+            language_fn()
+        };
+        mem::forget(library);
+        Ok(language)
+    }
+
+    fn load_language_configurations<'a>(&'a mut self, parser_path: &Path) -> io::Result<usize> {
+        let name = parser_path
+            .file_name()
+            .unwrap()
+            .to_str()
+            .unwrap()
+            .split_at("tree-sitter-".len())
+            .1;
+
+        #[derive(Deserialize)]
+        struct LanguageConfigurationJSON {
+            name: String,
+            #[serde(rename = "file-types")]
+            file_types: Option<Vec<String>>,
+            #[serde(rename = "content-regex")]
+            content_regex: Option<String>,
+            #[serde(rename = "first-line-regex")]
+            first_line_regex: Option<String>,
+            highlights: Option<String>,
+        }
+
+        #[derive(Deserialize)]
+        struct PackageJSON {
+            #[serde(rename = "tree-sitter")]
+            tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
+        }
+
+        let package_json_contents = fs::read_to_string(&parser_path.join(PACKAGE_JSON_PATH))?;
+        let package_json: PackageJSON = serde_json::from_str(&package_json_contents)?;
+        let configurations = package_json
+            .tree_sitter
+            .map_or(Vec::new(), |configurations| {
+                configurations
+                    .into_iter()
+                    .map(|conf| LanguageConfiguration {
+                        name: conf.name,
+                        file_types: conf.file_types.unwrap_or(Vec::new()),
+                        content_regex: conf
+                            .content_regex
+                            .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
+                        first_line_regex: conf
+                            .first_line_regex
+                            .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
+                        highlight_property_sheet: conf.highlights.map(|d| Err(d.into())),
+                    })
+                    .collect()
+            });
+
+        for (i, configuration) in configurations.iter().enumerate() {
+            for file_type in &configuration.file_types {
+                self.language_configuration_indices_by_file_type
+                    .entry(file_type.to_string())
+                    .or_insert(Vec::new())
+                    .push((self.language_repos.len(), i));
+            }
+        }
+
+        self.language_repos.push(LanguageRepo {
+            name: name.to_string(),
+            path: parser_path.to_owned(),
+            language: None,
+            configurations,
+        });
+
+        Ok(self.language_repos.len() - 1)
+    }
+}
+
+fn was_modified_more_recently(a: &Path, b: &Path) -> io::Result<bool> {
+    Ok(fs::metadata(a)?.modified()? > fs::metadata(b)?.modified()?)
+}
diff --git a/cli/src/main.rs b/cli/src/main.rs
index fe6ffd8c..87f9e26d 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -5,14 +5,20 @@ extern crate log;
 #[macro_use]
 extern crate serde_derive;
 extern crate hashbrown;
+extern crate regex;
 extern crate serde_json;
 
 mod error;
 mod generate;
+mod loader;
 mod logger;
+mod parse;
+mod test;
 
+use self::loader::Loader;
 use clap::{App, Arg, SubCommand};
 use std::env;
+use std::path::Path;
 use std::process::exit;
 use std::usize;
 
@@ -44,15 +50,13 @@ fn run() -> error::Result<()> {
                 .about("Parse a file")
                 .arg(Arg::with_name("path").index(1)),
         )
-        .subcommand(
-            SubCommand::with_name("test")
-                .about("Run a parser's tests")
-                .arg(Arg::with_name("path").index(1).required(true))
-                .arg(Arg::with_name("line").index(2).required(true))
-                .arg(Arg::with_name("column").index(3).required(true)),
-        )
+        .subcommand(SubCommand::with_name("test").about("Run a parser's tests"))
         .get_matches();
 
+    let home_dir = dirs::home_dir().unwrap();
+    let current_dir = env::current_dir().unwrap();
+    let mut loader = Loader::new(home_dir.join(".tree-sitter"));
+
     if let Some(matches) = matches.subcommand_matches("generate") {
         if matches.is_present("log") {
             logger::init();
@@ -65,11 +69,23 @@ fn run() -> error::Result<()> {
                 ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
                     .collect()
             });
-        let mut grammar_path = env::current_dir().expect("Failed to read CWD");
-        grammar_path.push("grammar.js");
+        let grammar_path = current_dir.join("grammar.js");
         let code =
             generate::generate_parser_for_grammar(&grammar_path, minimize, state_ids_to_log)?;
         println!("{}", code);
+        return Ok(());
+    }
+
+    if let Some(_matches) = matches.subcommand_matches("test") {
+        let corpus_path = current_dir.join("corpus");
+        let home_dir = dirs::home_dir().unwrap();
+        let mut loader = Loader::new(home_dir.join(".tree-sitter"));
+        if let Some((language, _)) = loader.language_configuration_at_path(&current_dir)? {
+            test::run_tests_at_path(language, &corpus_path)?;
+        } else {
+            eprintln!("No language found");
+        }
+    }
     }
 
     Ok(())
diff --git a/cli/src/test.rs b/cli/src/test.rs
new file mode 100644
index 00000000..7ef63bb7
--- /dev/null
+++ b/cli/src/test.rs
@@ -0,0 +1,212 @@
+use super::error::Result;
+use ansi_term::Colour;
+use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
+use regex::Regex;
+use std::char;
+use std::fs;
+use std::io;
+use std::path::Path;
+use std::str;
+use tree_sitter::{Language, Parser};
+
+lazy_static! {
+    static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
+        .multi_line(true)
+        .build()
+        .unwrap();
+    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+\r?\n")
+        .multi_line(true)
+        .build()
+        .unwrap();
+    static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap();
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum TestEntry {
+    Group {
+        name: String,
+        children: Vec<TestEntry>,
+    },
+    Example {
+        name: String,
+        input: Vec<u8>,
+        output: String,
+    },
+}
+
+pub fn run_tests_at_path(language: Language, path: &Path) -> Result<()> {
+    let test_entry = parse_tests(path)?;
+    let mut parser = Parser::new();
+    parser.set_language(language)?;
+
+    let mut failures = Vec::new();
+    if let TestEntry::Group { children, .. } = test_entry {
+        for child in children {
+            run_tests(&mut parser, child, 0, &mut failures)?;
+        }
+    }
+
+    if failures.len() > 0 {
+        println!("");
+
+        if failures.len() == 1 {
+            println!("1 failure:")
+        } else {
+            println!("{} failures:", failures.len())
+        }
+
+        for (name, actual, expected) in failures {
+            println!("\n  {}:", name);
+            println!("    Expected: {}", expected);
+            println!("    Actual: {}", actual);
+        }
+    }
+
+    Ok(())
+}
+
+fn run_tests(
+    parser: &mut Parser,
+    test_entry: TestEntry,
+    mut indent_level: i32,
+    failures: &mut Vec<(String, String, String)>,
+) -> Result<()> {
+    for _ in 0..indent_level {
+        print!("  ");
+    }
+    match test_entry {
+        TestEntry::Example {
+            name,
+            input,
+            output,
+        } => {
+            let tree = parser
+                .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
+                .unwrap();
+            let actual = tree.root_node().to_sexp();
+            if actual == output {
+                println!("✓ {}", Colour::Green.paint(&name));
+            } else {
+                println!("✗ {}", Colour::Red.paint(&name));
+                failures.push((name, actual, output));
+            }
+        }
+        TestEntry::Group { name, children } => {
+            println!("{}:", name);
+            indent_level += 1;
+            for child in children {
+                run_tests(parser, child, indent_level, failures)?;
+            }
+        }
+    }
+    Ok(())
+}
+
+pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
+    let name = path
+        .file_name()
+        .and_then(|s| s.to_str())
+        .unwrap_or("")
+        .to_string();
+    if path.is_dir() {
+        let mut children = Vec::new();
+        for entry in fs::read_dir(path)? {
+            let entry = entry?;
+            children.push(parse_tests(&entry.path())?);
+        }
+        Ok(TestEntry::Group { name, children })
+    } else {
+        let content = fs::read_to_string(path)?;
+        Ok(parse_test_content(name, content))
+    }
+}
+
+fn parse_test_content(name: String, content: String) -> TestEntry {
+    let mut children = Vec::new();
+    let bytes = content.as_bytes();
+    let mut previous_name = String::new();
+    let mut previous_header_end = 0;
+    for header_match in HEADER_REGEX
+        .find_iter(&bytes)
+        .map(|m| (m.start(), m.end()))
+        .chain(Some((bytes.len(), bytes.len())))
+    {
+        let (header_start, header_end) = header_match;
+        if previous_header_end > 0 {
+            if let Some(divider_match) =
+                DIVIDER_REGEX.find(&bytes[previous_header_end..header_start])
+            {
+                let (divider_start, divider_end) = (
+                    previous_header_end + divider_match.start(),
+                    previous_header_end + divider_match.end(),
+                );
+                if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) {
+                    let input = bytes[previous_header_end..divider_start].to_vec();
+                    let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string();
+                    children.push(TestEntry::Example {
+                        name: previous_name,
+                        input,
+                        output,
+                    });
+                }
+            }
+        }
+        previous_name = String::from_utf8_lossy(&bytes[header_start..header_end])
+            .trim_matches(|c| char::is_whitespace(c) || c == '=')
+            .to_string();
+        previous_header_end = header_end;
+    }
+    TestEntry::Group { name, children }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_test_content() {
+        let entry = parse_test_content(
+            "the-filename".to_string(),
+            r#"
+===============
+The first test
+===============
+
+a b c
+
+---
+
+(a
+    (b c))
+
+================
+The second test
+================
+d
+---
+(d)
+        "#
+            .trim()
+            .to_string(),
+        );
+
+        assert_eq!(
+            entry,
+            TestEntry::Group {
+                name: "the-filename".to_string(),
+                children: vec![
+                    TestEntry::Example {
+                        name: "The first test".to_string(),
+                        input: "\na b c\n\n".as_bytes().to_vec(),
+                        output: "(a (b c))".to_string(),
+                    },
+                    TestEntry::Example {
+                        name: "The second test".to_string(),
+                        input: "d\n".as_bytes().to_vec(),
+                        output: "(d)".to_string(),
+                    },
+                ]
+            }
+        );
+    }
+}

From 20fcffb393fdeac8f09ec42b7d6433dee68b4aaf Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 7 Jan 2019 17:57:36 -0800
Subject: [PATCH 108/208] Add parse subcommand

Co-Authored-By: Timothy Clem <timothy.clem@gmail.com>
---
 cli/src/main.rs  |  9 +++++++
 cli/src/parse.rs | 69 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 cli/src/parse.rs

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 87f9e26d..6a0cf9ec 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -86,6 +86,15 @@ fn run() -> error::Result<()> {
             eprintln!("No language found");
         }
     }
+
+    if let Some(matches) = matches.subcommand_matches("parse") {
+        loader.find_parsers(&vec![home_dir.join("github")])?;
+        let source_path = Path::new(matches.value_of("path").unwrap());
+        if let Some((language, _)) = loader.language_for_file_name(source_path)? {
+            parse::parse_file_at_path(language, source_path)?;
+        } else {
+            eprintln!("No language found");
+        }
     }
 
     Ok(())
diff --git a/cli/src/parse.rs b/cli/src/parse.rs
new file mode 100644
index 00000000..26fe9b9a
--- /dev/null
+++ b/cli/src/parse.rs
@@ -0,0 +1,69 @@
+use super::error::Result;
+use std::fs;
+use std::path::Path;
+use tree_sitter::{Language, Parser};
+use std::io::{self, Write};
+
+pub fn parse_file_at_path(language: Language, path: &Path) -> Result<()> {
+    let mut parser = Parser::new();
+    parser.set_language(language)?;
+    let source_code = fs::read_to_string(path)?;
+    let tree = parser
+        .parse_str(&source_code, None)
+        .expect("Incompatible language version");
+
+    let stdout = io::stdout();
+    let mut stdout = stdout.lock();
+    let mut cursor = tree.walk();
+    let mut needs_newline = false;
+    let mut indent_level = 0;
+    let mut did_visit_children = false;
+    loop {
+        let node = cursor.node();
+        let is_named = node.is_named();
+        if did_visit_children {
+            if is_named {
+                stdout.write(b")")?;
+                needs_newline = true;
+            }
+            if cursor.goto_next_sibling() {
+                did_visit_children = false;
+            } else if cursor.goto_parent() {
+                did_visit_children = true;
+                indent_level -= 1;
+            } else {
+                break;
+            }
+        } else {
+            if is_named {
+                if needs_newline {
+                    stdout.write(b"\n")?;
+                }
+                for _ in 0..indent_level {
+                    stdout.write(b"  ")?;
+                }
+                let start = node.start_position();
+                let end = node.end_position();
+                write!(
+                    &mut stdout,
+                    "({} [{}, {}] - [{}, {}]",
+                    node.kind(),
+                    start.row,
+                    start.column,
+                    end.row,
+                    end.column
+                )?;
+                needs_newline = true;
+            }
+            if cursor.goto_first_child() {
+                did_visit_children = false;
+                indent_level += 1;
+            } else {
+                did_visit_children = true;
+            }
+        }
+    }
+
+    println!("");
+    Ok(())
+}

From 6c4d00aad58c72015941ab86c042b3db1d3f7df9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 7 Jan 2019 22:01:40 -0800
Subject: [PATCH 109/208] Print diffs when tests fail

---
 Cargo.lock      |  7 +++++++
 cli/Cargo.toml  |  1 +
 cli/src/main.rs |  8 ++------
 cli/src/test.rs | 29 +++++++++++++++++++++++++----
 4 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7a249312..db86e43b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -156,6 +156,11 @@ dependencies = [
  "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "difference"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "dirs"
 version = "1.0.4"
@@ -643,6 +648,7 @@ version = "0.1.0"
 dependencies = [
  "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -764,6 +770,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2449aaa4ec7ef96e5fb24db16024b935df718e9ae1cec0a1e68feeca2efca7b8"
 "checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015"
 "checksum crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c55913cc2799171a550e307918c0a360e8c16004820291bf3b638969b4a01816"
+"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
 "checksum dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "88972de891f6118092b643d85a0b28e0678e0f948d7f879aa32f2d5aafe97d2a"
 "checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7"
 "checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596"
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 200fd2f1..a2f546c4 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -10,6 +10,7 @@ path = "src/main.rs"
 
 [dependencies]
 ansi_term = "0.11"
+difference = "2.0"
 lazy_static = "1.2.0"
 smallbitvec = "2.3.0"
 clap = "2.32"
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 6a0cf9ec..626a729c 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -74,9 +74,7 @@ fn run() -> error::Result<()> {
             generate::generate_parser_for_grammar(&grammar_path, minimize, state_ids_to_log)?;
         println!("{}", code);
         return Ok(());
-    }
-
-    if let Some(_matches) = matches.subcommand_matches("test") {
+    } else if let Some(_matches) = matches.subcommand_matches("test") {
         let corpus_path = current_dir.join("corpus");
         let home_dir = dirs::home_dir().unwrap();
         let mut loader = Loader::new(home_dir.join(".tree-sitter"));
@@ -85,9 +83,7 @@ fn run() -> error::Result<()> {
         } else {
             eprintln!("No language found");
         }
-    }
-
-    if let Some(matches) = matches.subcommand_matches("parse") {
+    } else if let Some(matches) = matches.subcommand_matches("parse") {
         loader.find_parsers(&vec![home_dir.join("github")])?;
         let source_path = Path::new(matches.value_of("path").unwrap());
         if let Some((language, _)) = loader.language_for_file_name(source_path)? {
diff --git a/cli/src/test.rs b/cli/src/test.rs
index 7ef63bb7..a693576d 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -1,5 +1,6 @@
 use super::error::Result;
 use ansi_term::Colour;
+use difference::{Changeset, Difference};
 use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
 use regex::Regex;
 use std::char;
@@ -55,10 +56,30 @@ pub fn run_tests_at_path(language: Language, path: &Path) -> Result<()> {
             println!("{} failures:", failures.len())
         }
 
-        for (name, actual, expected) in failures {
-            println!("\n  {}:", name);
-            println!("    Expected: {}", expected);
-            println!("    Actual: {}", actual);
+        println!(
+            "\n{} / {}",
+            Colour::Green.paint("expected"),
+            Colour::Red.paint("actual")
+        );
+
+        for (i, (name, actual, expected)) in failures.iter().enumerate() {
+            println!("\n  {}. {}:", i + 1, name);
+            let changeset = Changeset::new(actual, expected, " ");
+            print!("    ");
+            for diff in &changeset.diffs {
+                match diff {
+                    Difference::Same(part) => {
+                        print!("{}{}", part, changeset.split);
+                    }
+                    Difference::Add(part) => {
+                        print!("{}{}", Colour::Green.paint(part), changeset.split);
+                    }
+                    Difference::Rem(part) => {
+                        print!("{}{}", Colour::Red.paint(part), changeset.split);
+                    }
+                }
+            }
+            println!("");
         }
     }
 

From 98807d205317e0e5ef7512827657086608adcd35 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 8 Jan 2019 21:03:51 -0800
Subject: [PATCH 110/208] Add debug and debug-graph flags to parse and test
 commands

---
 cli/src/main.rs                   | 31 ++++++++++++++-----
 cli/src/parse.rs                  | 30 +++++++++++++++++--
 cli/src/test.rs                   | 49 ++++++++++++++++++++++++++-----
 cli/src/util.rs                   | 36 +++++++++++++++++++++++
 lib/binding/bindings.rs           | 31 +++++++++++++------
 lib/binding/ffi.rs                |  5 ++++
 lib/binding/lib.rs                | 14 +++++++++
 lib/include/tree_sitter/runtime.h |  2 +-
 lib/src/parser.c                  | 12 ++++++--
 script/bindgen.sh                 |  4 +--
 10 files changed, 182 insertions(+), 32 deletions(-)
 create mode 100644 cli/src/util.rs

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 626a729c..8dbf345a 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -14,6 +14,7 @@ mod loader;
 mod logger;
 mod parse;
 mod test;
+mod util;
 
 use self::loader::Loader;
 use clap::{App, Arg, SubCommand};
@@ -48,9 +49,22 @@ fn run() -> error::Result<()> {
         .subcommand(
             SubCommand::with_name("parse")
                 .about("Parse a file")
-                .arg(Arg::with_name("path").index(1)),
+                .arg(Arg::with_name("path").index(1).required(true))
+                .arg(Arg::with_name("debug").long("debug").short("d"))
+                .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
+        )
+        .subcommand(
+            SubCommand::with_name("test")
+                .about("Run a parser's tests")
+                .arg(
+                    Arg::with_name("filter")
+                        .long("filter")
+                        .short("f")
+                        .takes_value(true),
+                )
+                .arg(Arg::with_name("debug").long("debug").short("d"))
+                .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
         )
-        .subcommand(SubCommand::with_name("test").about("Run a parser's tests"))
         .get_matches();
 
     let home_dir = dirs::home_dir().unwrap();
@@ -74,20 +88,23 @@ fn run() -> error::Result<()> {
             generate::generate_parser_for_grammar(&grammar_path, minimize, state_ids_to_log)?;
         println!("{}", code);
         return Ok(());
-    } else if let Some(_matches) = matches.subcommand_matches("test") {
+    } else if let Some(matches) = matches.subcommand_matches("test") {
+        let debug = matches.is_present("debug");
+        let debug_graph = matches.is_present("debug-graph");
+        let filter = matches.value_of("filter");
         let corpus_path = current_dir.join("corpus");
-        let home_dir = dirs::home_dir().unwrap();
-        let mut loader = Loader::new(home_dir.join(".tree-sitter"));
         if let Some((language, _)) = loader.language_configuration_at_path(&current_dir)? {
-            test::run_tests_at_path(language, &corpus_path)?;
+            test::run_tests_at_path(language, &corpus_path, debug, debug_graph, filter)?;
         } else {
             eprintln!("No language found");
         }
     } else if let Some(matches) = matches.subcommand_matches("parse") {
+        let debug = matches.is_present("debug");
+        let debug_graph = matches.is_present("debug-graph");
         loader.find_parsers(&vec![home_dir.join("github")])?;
         let source_path = Path::new(matches.value_of("path").unwrap());
         if let Some((language, _)) = loader.language_for_file_name(source_path)? {
-            parse::parse_file_at_path(language, source_path)?;
+            parse::parse_file_at_path(language, source_path, debug, debug_graph)?;
         } else {
             eprintln!("No language found");
         }
diff --git a/cli/src/parse.rs b/cli/src/parse.rs
index 26fe9b9a..fde148b6 100644
--- a/cli/src/parse.rs
+++ b/cli/src/parse.rs
@@ -1,17 +1,41 @@
 use super::error::Result;
+use super::util;
 use std::fs;
-use std::path::Path;
-use tree_sitter::{Language, Parser};
 use std::io::{self, Write};
+use std::path::Path;
+use tree_sitter::{Language, LogType, Parser};
 
-pub fn parse_file_at_path(language: Language, path: &Path) -> Result<()> {
+pub fn parse_file_at_path(
+    language: Language,
+    path: &Path,
+    debug: bool,
+    debug_graph: bool,
+) -> Result<()> {
     let mut parser = Parser::new();
     parser.set_language(language)?;
     let source_code = fs::read_to_string(path)?;
+
+    let mut log_session = None;
+
+    if debug_graph {
+        log_session = Some(util::start_logging_graphs(&mut parser, "log.html")?);
+    } else if debug {
+        parser.set_logger(Some(Box::new(|log_type, message| {
+            if log_type == LogType::Lex {
+                io::stderr().write(b"  ").unwrap();
+            }
+            write!(&mut io::stderr(), "{}\n", message).unwrap();
+        })));
+    }
+
     let tree = parser
         .parse_str(&source_code, None)
         .expect("Incompatible language version");
 
+    if let Some(log_session) = log_session {
+        util::stop_logging_graphs(&mut parser, log_session)?;
+    }
+
     let stdout = io::stdout();
     let mut stdout = stdout.lock();
     let mut cursor = tree.walk();
diff --git a/cli/src/test.rs b/cli/src/test.rs
index a693576d..790e9ec7 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -1,14 +1,15 @@
 use super::error::Result;
+use super::util;
 use ansi_term::Colour;
 use difference::{Changeset, Difference};
 use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
 use regex::Regex;
 use std::char;
 use std::fs;
-use std::io;
+use std::io::{self, Write};
 use std::path::Path;
 use std::str;
-use tree_sitter::{Language, Parser};
+use tree_sitter::{Language, LogType, Parser};
 
 lazy_static! {
     static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
@@ -35,15 +36,34 @@ pub enum TestEntry {
     },
 }
 
-pub fn run_tests_at_path(language: Language, path: &Path) -> Result<()> {
+pub fn run_tests_at_path(
+    language: Language,
+    path: &Path,
+    debug: bool,
+    debug_graph: bool,
+    filter: Option<&str>,
+) -> Result<()> {
     let test_entry = parse_tests(path)?;
     let mut parser = Parser::new();
     parser.set_language(language)?;
 
+    let mut log_session = None;
+
+    if debug_graph {
+        log_session = Some(util::start_logging_graphs(&mut parser, "log.html")?);
+    } else if debug {
+        parser.set_logger(Some(Box::new(|log_type, message| {
+            if log_type == LogType::Lex {
+                io::stderr().write(b"  ").unwrap();
+            }
+            write!(&mut io::stderr(), "{}\n", message).unwrap();
+        })));
+    }
+
     let mut failures = Vec::new();
     if let TestEntry::Group { children, .. } = test_entry {
         for child in children {
-            run_tests(&mut parser, child, 0, &mut failures)?;
+            run_tests(&mut parser, child, filter, 0, &mut failures)?;
         }
     }
 
@@ -83,28 +103,38 @@ pub fn run_tests_at_path(language: Language, path: &Path) -> Result<()> {
         }
     }
 
+    if let Some(log_session) = log_session {
+        util::stop_logging_graphs(&mut parser, log_session)?;
+    }
+
     Ok(())
 }
 
 fn run_tests(
     parser: &mut Parser,
     test_entry: TestEntry,
+    filter: Option<&str>,
     mut indent_level: i32,
     failures: &mut Vec<(String, String, String)>,
 ) -> Result<()> {
-    for _ in 0..indent_level {
-        print!("  ");
-    }
     match test_entry {
         TestEntry::Example {
             name,
             input,
             output,
         } => {
+            if let Some(filter) = filter {
+                if !name.contains(filter) {
+                    return Ok(());
+                }
+            }
             let tree = parser
                 .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
                 .unwrap();
             let actual = tree.root_node().to_sexp();
+            for _ in 0..indent_level {
+                print!("  ");
+            }
             if actual == output {
                 println!("✓ {}", Colour::Green.paint(&name));
             } else {
@@ -113,10 +143,13 @@ fn run_tests(
             }
         }
         TestEntry::Group { name, children } => {
+            for _ in 0..indent_level {
+                print!("  ");
+            }
             println!("{}:", name);
             indent_level += 1;
             for child in children {
-                run_tests(parser, child, indent_level, failures)?;
+                run_tests(parser, child, filter, indent_level, failures)?;
             }
         }
     }
diff --git a/cli/src/util.rs b/cli/src/util.rs
new file mode 100644
index 00000000..d7d8572e
--- /dev/null
+++ b/cli/src/util.rs
@@ -0,0 +1,36 @@
+use std::fs::File;
+use std::io::{Result, Write};
+use std::process::{Child, ChildStdin, Command, Stdio};
+use std::str;
+use tree_sitter::Parser;
+
+pub(crate) struct LogSession(Child, ChildStdin);
+
+pub(crate) fn start_logging_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
+    let mut dot_file = File::create(path)?;
+    dot_file.write(b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n")?;
+    let mut dot_process = Command::new("dot")
+        .arg("-Tsvg")
+        .stdin(Stdio::piped())
+        .stdout(dot_file)
+        .spawn()
+        .expect("Failed to run Dot");
+    let dot_stdin = dot_process
+        .stdin
+        .take()
+        .expect("Failed to open stdin for Dot");
+    parser.print_dot_graphs(&dot_stdin);
+    Ok(LogSession(dot_process, dot_stdin))
+}
+
+pub(crate) fn stop_logging_graphs(parser: &mut Parser, mut session: LogSession) -> Result<()> {
+    drop(session.1);
+    parser.stop_printing_dot_graphs();
+    session.0.wait()?;
+
+    if cfg!(target_os = "macos") {
+        Command::new("open").arg("log.html").output()?;
+    }
+
+    Ok(())
+}
diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs
index 58d0e510..9d1f3490 100644
--- a/lib/binding/bindings.rs
+++ b/lib/binding/bindings.rs
@@ -1,5 +1,6 @@
 /* automatically generated by rust-bindgen */
 
+pub type __darwin_size_t = ::std::os::raw::c_ulong;
 pub type FILE = [u64; 19usize];
 pub type TSSymbol = u16;
 #[repr(C)]
@@ -87,9 +88,9 @@ pub struct TSNode {
 #[repr(C)]
 #[derive(Debug, Copy, Clone)]
 pub struct TSTreeCursor {
-    pub context: [u32; 2usize],
-    pub id: *const ::std::os::raw::c_void,
     pub tree: *const ::std::os::raw::c_void,
+    pub id: *const ::std::os::raw::c_void,
+    pub context: [u32; 2usize],
 }
 extern "C" {
     pub fn ts_parser_new() -> *mut TSParser;
@@ -110,7 +111,7 @@ extern "C" {
     pub fn ts_parser_set_logger(arg1: *mut TSParser, arg2: TSLogger);
 }
 extern "C" {
-    pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: *mut FILE);
+    pub fn ts_parser_print_dot_graphs(arg1: *mut TSParser, arg2: ::std::os::raw::c_int);
 }
 extern "C" {
     pub fn ts_parser_halt_on_error(arg1: *mut TSParser, arg2: bool);
@@ -126,6 +127,15 @@ extern "C" {
         arg4: u32,
     ) -> *mut TSTree;
 }
+extern "C" {
+    pub fn ts_parser_parse_string_encoding(
+        arg1: *mut TSParser,
+        arg2: *const TSTree,
+        arg3: *const ::std::os::raw::c_char,
+        arg4: u32,
+        arg5: TSInputEncoding,
+    ) -> *mut TSTree;
+}
 extern "C" {
     pub fn ts_parser_enabled(arg1: *const TSParser) -> bool;
 }
@@ -271,19 +281,22 @@ extern "C" {
     pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor);
 }
 extern "C" {
-    pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
+    pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode);
 }
 extern "C" {
-    pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64;
-}
-extern "C" {
-    pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
+    pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
 }
 extern "C" {
     pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
 }
 extern "C" {
-    pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
+    pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool;
+}
+extern "C" {
+    pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool;
+}
+extern "C" {
+    pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64;
 }
 extern "C" {
     pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32;
diff --git a/lib/binding/ffi.rs b/lib/binding/ffi.rs
index 323609e0..685ed765 100644
--- a/lib/binding/ffi.rs
+++ b/lib/binding/ffi.rs
@@ -1,4 +1,9 @@
 #![allow(dead_code)]
 #![allow(non_upper_case_globals)]
+#![allow(non_camel_case_types)]
 
 include!("./bindings.rs");
+
+extern "C" {
+    pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int;
+}
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index 65a57d16..ae3c979c 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -6,6 +6,9 @@ extern crate regex;
 extern crate serde;
 extern crate serde_json;
 
+#[cfg(unix)]
+use std::os::unix::io::AsRawFd;
+
 use regex::Regex;
 use serde::de::DeserializeOwned;
 use std::collections::HashMap;
@@ -185,6 +188,17 @@ impl Parser {
         unsafe { ffi::ts_parser_set_logger(self.0, c_logger) };
     }
 
+    #[cfg(unix)]
+    pub fn print_dot_graphs(&mut self, file: & impl AsRawFd) {
+        let fd = file.as_raw_fd();
+        unsafe { ffi::ts_parser_print_dot_graphs(self.0, ffi::dup(fd)) }
+    }
+
+    #[cfg(unix)]
+    pub fn stop_printing_dot_graphs(&mut self) {
+        unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) }
+    }
+
     pub fn parse_str(&mut self, input: &str, old_tree: Option<&Tree>) -> Option<Tree> {
         let bytes = input.as_bytes();
         self.parse_utf8(
diff --git a/lib/include/tree_sitter/runtime.h b/lib/include/tree_sitter/runtime.h
index f0442477..ab69a0b5 100644
--- a/lib/include/tree_sitter/runtime.h
+++ b/lib/include/tree_sitter/runtime.h
@@ -83,7 +83,7 @@ const TSLanguage *ts_parser_language(const TSParser *);
 bool ts_parser_set_language(TSParser *, const TSLanguage *);
 TSLogger ts_parser_logger(const TSParser *);
 void ts_parser_set_logger(TSParser *, TSLogger);
-void ts_parser_print_dot_graphs(TSParser *, FILE *);
+void ts_parser_print_dot_graphs(TSParser *, int);
 void ts_parser_halt_on_error(TSParser *, bool);
 TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput);
 TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t);
diff --git a/lib/src/parser.c b/lib/src/parser.c
index ef7f612d..a33dbc6f 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -1542,8 +1542,16 @@ void ts_parser_set_logger(TSParser *self, TSLogger logger) {
   self->lexer.logger = logger;
 }
 
-void ts_parser_print_dot_graphs(TSParser *self, FILE *file) {
-  self->dot_graph_file = file;
+void ts_parser_print_dot_graphs(TSParser *self, int fd) {
+  if (self->dot_graph_file) {
+    fclose(self->dot_graph_file);
+  }
+
+  if (fd >= 0) {
+    self->dot_graph_file = fdopen(fd, "a");
+  } else {
+    self->dot_graph_file = NULL;
+  }
 }
 
 void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) {
diff --git a/script/bindgen.sh b/script/bindgen.sh
index 699f0339..0a536d20 100755
--- a/script/bindgen.sh
+++ b/script/bindgen.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-output_path=src/bindings.rs
-header_path='vendor/tree-sitter/include/tree_sitter/runtime.h'
+output_path=lib/binding/bindings.rs
+header_path='lib/include/tree_sitter/runtime.h'
 
 bindgen                         \
   --no-layout-tests             \

From 6972a8e3e8ac442a84b2ad48999d58d9b41d4c4c Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 9 Jan 2019 09:58:45 -0800
Subject: [PATCH 111/208] Add logging when deciding not to merge parse states

---
 cli/src/generate/build_tables/minimize_parse_table.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs
index 007c9703..a5cb5f81 100644
--- a/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@@ -1,6 +1,6 @@
 use super::item::TokenSet;
 use super::token_conflicts::TokenConflictMap;
-use crate::generate::grammars::{SyntaxGrammar, VariableType};
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
 use crate::generate::rules::{AliasMap, Symbol};
 use crate::generate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
 use hashbrown::{HashMap, HashSet};
@@ -8,6 +8,7 @@ use hashbrown::{HashMap, HashSet};
 pub(crate) fn minimize_parse_table(
     parse_table: &mut ParseTable,
     syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
     simple_aliases: &AliasMap,
     token_conflict_map: &TokenConflictMap,
     keywords: &TokenSet,
@@ -15,6 +16,7 @@ pub(crate) fn minimize_parse_table(
     let mut minimizer = Minimizer {
         parse_table,
         syntax_grammar,
+        lexical_grammar,
         token_conflict_map,
         keywords,
         simple_aliases,
@@ -27,6 +29,7 @@ pub(crate) fn minimize_parse_table(
 struct Minimizer<'a> {
     parse_table: &'a mut ParseTable,
     syntax_grammar: &'a SyntaxGrammar,
+    lexical_grammar: &'a LexicalGrammar,
     token_conflict_map: &'a TokenConflictMap<'a>,
     keywords: &'a TokenSet,
     simple_aliases: &'a AliasMap,
@@ -237,6 +240,11 @@ impl<'a> Minimizer<'a> {
                         .token_conflict_map
                         .does_match_same_string(token.index, existing_token.index)
                 {
+                    info!(
+                        "can't merge parse states because of conflict between {} and {}",
+                        self.lexical_grammar.variables[token.index].name,
+                        self.lexical_grammar.variables[existing_token.index].name
+                    );
                     return false;
                 }
             }

From 2e8b2ab8fb988790dfa45e6c173b80647786c4fe Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 9 Jan 2019 09:59:46 -0800
Subject: [PATCH 112/208] Give strings more implicit precedence than immediate
 tokens

---
 cli/src/generate/build_tables/mod.rs              | 5 ++++-
 cli/src/generate/prepare_grammar/expand_tokens.rs | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index 7d55d0fa..b8432fe5 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -43,12 +43,14 @@ pub(crate) fn build_tables(
         lexical_grammar,
         &coincident_token_index,
         &token_conflict_map,
+        &keywords,
     );
     mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
     if minimize {
         minimize_parse_table(
             &mut parse_table,
             syntax_grammar,
+            lexical_grammar,
             simple_aliases,
             &token_conflict_map,
             &keywords,
@@ -77,6 +79,7 @@ fn populate_error_state(
     lexical_grammar: &LexicalGrammar,
     coincident_token_index: &CoincidentTokenIndex,
     token_conflict_map: &TokenConflictMap,
+    keywords: &TokenSet,
 ) {
     let state = &mut parse_table.states[0];
     let n = lexical_grammar.variables.len();
@@ -112,7 +115,7 @@ fn populate_error_state(
     // the *conflict-free tokens* identified above.
     for i in 0..n {
         let symbol = Symbol::terminal(i);
-        if !conflict_free_tokens.contains(&symbol) {
+        if !conflict_free_tokens.contains(&symbol) && !keywords.contains(&symbol) {
             if syntax_grammar.word_token != Some(symbol) {
                 if let Some(t) = conflict_free_tokens.iter().find(|t| {
                     !coincident_token_index.contains(symbol, *t)
diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
index d1922dc0..e269df6d 100644
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -16,10 +16,10 @@ struct NfaBuilder {
 
 fn get_implicit_precedence(rule: &Rule) -> i32 {
     match rule {
-        Rule::String(_) => 1,
+        Rule::String(_) => 2,
         Rule::Metadata { rule, params } => {
             if params.is_main_token {
-                get_implicit_precedence(rule) + 2
+                get_implicit_precedence(rule) + 1
             } else {
                 get_implicit_precedence(rule)
             }

From c0fad8b3c4f477be3aa846b8d7570266ea8d17de Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 9 Jan 2019 14:43:49 -0800
Subject: [PATCH 113/208] Write parser.c in generate command

---
 cli/src/generate/mod.rs | 10 ++++++----
 cli/src/main.rs         |  6 +-----
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 7dfe5a4b..366d2495 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -3,6 +3,7 @@ use self::parse_grammar::parse_grammar;
 use self::prepare_grammar::prepare_grammar;
 use self::render::render_c_code;
 use crate::error::Result;
+use std::fs;
 use std::io::Write;
 use std::path::PathBuf;
 use std::process::{Command, Stdio};
@@ -17,11 +18,11 @@ mod rules;
 mod tables;
 
 pub fn generate_parser_for_grammar(
-    grammar_path: &PathBuf,
+    repo_path: &PathBuf,
     minimize: bool,
     state_ids_to_log: Vec<usize>,
-) -> Result<String> {
-    let grammar_json = load_js_grammar_file(grammar_path);
+) -> Result<()> {
+    let grammar_json = load_js_grammar_file(&repo_path.join("grammar.js"));
     let input_grammar = parse_grammar(&grammar_json)?;
     let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
         prepare_grammar(&input_grammar)?;
@@ -43,7 +44,8 @@ pub fn generate_parser_for_grammar(
         lexical_grammar,
         simple_aliases,
     );
-    Ok(c_code)
+    fs::write(repo_path.join("src").join("parser.c"), c_code)?;
+    Ok(())
 }
 
 fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 8dbf345a..604d3068 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -83,11 +83,7 @@ fn run() -> error::Result<()> {
                 ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
                     .collect()
             });
-        let grammar_path = current_dir.join("grammar.js");
-        let code =
-            generate::generate_parser_for_grammar(&grammar_path, minimize, state_ids_to_log)?;
-        println!("{}", code);
-        return Ok(());
+        generate::generate_parser_for_grammar(&current_dir, minimize, state_ids_to_log)?;
     } else if let Some(matches) = matches.subcommand_matches("test") {
         let debug = matches.is_present("debug");
         let debug_graph = matches.is_present("debug-graph");

From 6bd550ca87f6ab4489e8ce1a6b46458ab6afbabe Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 9 Jan 2019 18:09:55 -0800
Subject: [PATCH 114/208] Start work on property sheet compilation

---
 Cargo.lock                     | 142 ++++++++++++++
 cli/Cargo.toml                 |   1 +
 cli/src/error.rs               |   6 +
 cli/src/generate/mod.rs        |  51 ++---
 cli/src/generate/properties.rs | 327 +++++++++++++++++++++++++++++++++
 cli/src/main.rs                |   5 +-
 lib/binding/lib.rs             |  48 ++---
 7 files changed, 532 insertions(+), 48 deletions(-)
 create mode 100644 cli/src/generate/properties.rs

diff --git a/Cargo.lock b/Cargo.lock
index db86e43b..fa7712ba 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -41,6 +41,11 @@ dependencies = [
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "autocfg"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "backtrace"
 version = "0.3.9"
@@ -336,6 +341,36 @@ name = "nodrop"
 version = "0.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
+[[package]]
+name = "nom"
+version = "4.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "owning_ref"
 version = "0.4.0"
@@ -408,6 +443,32 @@ dependencies = [
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "rand"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "rand_core"
 version = "0.2.2"
@@ -421,6 +482,60 @@ name = "rand_core"
 version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
+[[package]]
+name = "rand_hc"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand_isaac"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand_os"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand_pcg"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand_xorshift"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rdrand"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.1.43"
@@ -465,6 +580,18 @@ dependencies = [
  "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "rsass"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "nom 4.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "rusqlite"
 version = "0.14.0"
@@ -657,6 +784,7 @@ dependencies = [
  "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -756,6 +884,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum argon2rs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3f67b0b6a86dae6e67ff4ca2b6201396074996379fba2b92ff649126f37cb392"
 "checksum arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f405cc4c21cd8b784f6c8fc2adf9bc00f59558f0049b5ec21517f875963040cc"
 "checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
+"checksum autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727"
 "checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a"
 "checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
 "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
@@ -793,6 +922,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
 "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
 "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
+"checksum nom 4.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9c349f68f25f596b9f44cf0e7c69752a5c633b0550c3ff849518bfba0233774a"
+"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
+"checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10"
+"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
 "checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13"
 "checksum parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0802bff09003b291ba756dc7e79313e51cc31667e94afbe847def490424cde5"
 "checksum parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad7f7e6ebdc79edff6fdcb87a55b620174f7a989e3eb31b65231f4af57f00b8c"
@@ -801,13 +934,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c"
 "checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
 "checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c"
+"checksum rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5"
+"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
 "checksum rand_core 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1961a422c4d189dfb50ffa9320bf1f2a9bd54ecb92792fb9477f99a1045f3372"
 "checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db"
+"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
+"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
+"checksum rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca"
+"checksum rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05"
+"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
+"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
 "checksum redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d"
 "checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
 "checksum redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "214a97e49be64fd2c86f568dd0cb2c757d2cc53de95b273b6ad0a1c908482f26"
 "checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f"
 "checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1"
+"checksum rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7a5dde55023a6c19470f7aeb59f75f897d8b80cbe00d61dfcaf7bbbe3de4c0a6"
 "checksum rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c9d9118f1ce84d8d0b67f9779936432fb42bb620cef2122409d786892cce9a3c"
 "checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
 "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index a2f546c4..2eabd88f 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -23,6 +23,7 @@ serde = "1.0"
 serde_derive = "1.0"
 regex-syntax = "0.6.4"
 regex = "1"
+rsass = "0.9"
 
 [dependencies.tree-sitter]
 path = "../lib"
diff --git a/cli/src/error.rs b/cli/src/error.rs
index 1b8b1a79..4769b481 100644
--- a/cli/src/error.rs
+++ b/cli/src/error.rs
@@ -31,6 +31,12 @@ impl From<io::Error> for Error {
     }
 }
 
+impl From<rsass::Error> for Error {
+    fn from(error: rsass::Error) -> Self {
+        Error(error.to_string())
+    }
+}
+
 impl From<String> for Error {
     fn from(error: String) -> Self {
         Error(error)
diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 366d2495..0899d793 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -13,6 +13,7 @@ mod grammars;
 mod nfa;
 mod parse_grammar;
 mod prepare_grammar;
+mod properties;
 mod render;
 mod rules;
 mod tables;
@@ -21,30 +22,34 @@ pub fn generate_parser_for_grammar(
     repo_path: &PathBuf,
     minimize: bool,
     state_ids_to_log: Vec<usize>,
+    properties_only: bool,
 ) -> Result<()> {
-    let grammar_json = load_js_grammar_file(&repo_path.join("grammar.js"));
-    let input_grammar = parse_grammar(&grammar_json)?;
-    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
-        prepare_grammar(&input_grammar)?;
-    let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
-        &syntax_grammar,
-        &lexical_grammar,
-        &simple_aliases,
-        &inlines,
-        minimize,
-        state_ids_to_log,
-    )?;
-    let c_code = render_c_code(
-        &input_grammar.name,
-        parse_table,
-        main_lex_table,
-        keyword_lex_table,
-        keyword_capture_token,
-        syntax_grammar,
-        lexical_grammar,
-        simple_aliases,
-    );
-    fs::write(repo_path.join("src").join("parser.c"), c_code)?;
+    if !properties_only {
+        let grammar_json = load_js_grammar_file(&repo_path.join("grammar.js"));
+        let input_grammar = parse_grammar(&grammar_json)?;
+        let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+            prepare_grammar(&input_grammar)?;
+        let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
+            &syntax_grammar,
+            &lexical_grammar,
+            &simple_aliases,
+            &inlines,
+            minimize,
+            state_ids_to_log,
+        )?;
+        let c_code = render_c_code(
+            &input_grammar.name,
+            parse_table,
+            main_lex_table,
+            keyword_lex_table,
+            keyword_capture_token,
+            syntax_grammar,
+            lexical_grammar,
+            simple_aliases,
+        );
+        fs::write(repo_path.join("src").join("parser.c"), c_code)?;
+    }
+    properties::generate_property_sheets(repo_path)?;
     Ok(())
 }
 
diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
new file mode 100644
index 00000000..c328526f
--- /dev/null
+++ b/cli/src/generate/properties.rs
@@ -0,0 +1,327 @@
+use crate::error::{Error, Result};
+use rsass;
+use rsass::sass::Value;
+use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
+use std::fmt;
+use std::fmt::Write;
+use std::fs::{self, File};
+use std::hash::{Hash, Hasher};
+use std::path::{Path, PathBuf};
+use tree_sitter::{self, PropertyStateJSON, PropertyTransitionJSON};
+
+#[derive(Debug, PartialEq, Eq, Hash, Serialize)]
+#[serde(untagged)]
+enum PropertyValue {
+    String(String),
+    Object(PropertySet),
+    Array(Vec<PropertyValue>),
+}
+
+type PropertySet = BTreeMap<String, PropertyValue>;
+type PropertySheetJSON = tree_sitter::PropertySheetJSON<PropertySet>;
+type StateId = u32;
+type PropertySetId = u32;
+
+#[derive(Clone, PartialEq, Eq)]
+struct SelectorStep {
+    kind: String,
+    is_named: bool,
+    is_immediate: bool,
+    child_index: Option<i32>,
+    text_pattern: Option<String>,
+}
+
+#[derive(PartialEq, Eq)]
+struct Selector(Vec<SelectorStep>);
+
+#[derive(Debug, PartialEq, Eq)]
+struct Rule {
+    selectors: Vec<Selector>,
+    properties: PropertySet,
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+struct PropertyItem {
+    rule_id: u32,
+    selector_id: u32,
+    step_id: u32,
+}
+
+#[derive(PartialEq, Eq)]
+struct PropertyItemSet(BTreeSet<PropertyItem>);
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+struct SelectorMatch {
+    specificity: u32,
+    rule_id: u32,
+    selector_id: u32,
+}
+
+struct Builder {
+    rules: Vec<Rule>,
+    output: PropertySheetJSON,
+    ids_by_item_set: HashMap<PropertyItemSet, StateId>,
+    ids_by_property_set: HashMap<PropertySet, PropertySetId>,
+    item_set_queue: VecDeque<(PropertyItemSet, StateId)>,
+}
+
+impl Builder {
+    fn new(rules: Vec<Rule>) -> Self {
+        Builder {
+            rules,
+            output: PropertySheetJSON {
+                states: Vec::new(),
+                property_sets: Vec::new(),
+            },
+            ids_by_item_set: HashMap::new(),
+            ids_by_property_set: HashMap::new(),
+            item_set_queue: VecDeque::new(),
+        }
+    }
+
+    fn build(self) -> PropertySheetJSON {
+        let mut start_item_set = PropertyItemSet(BTreeSet::new());
+
+        self.output
+    }
+}
+
+impl Hash for PropertyItemSet {
+    fn hash<H: Hasher>(&self, h: &mut H) {
+        h.write_usize(self.0.len());
+        for entry in &self.0 {
+            entry.hash(h);
+        }
+    }
+}
+
+impl fmt::Debug for SelectorStep {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "(")?;
+        if self.is_named {
+            write!(f, "{}", self.kind)?;
+        } else {
+            write!(f, "\"{}\"", self.kind)?;
+        }
+        if let Some(n) = self.child_index {
+            write!(f, ":nth-child({})", n)?;
+        }
+        if let Some(t) = &self.text_pattern {
+            write!(f, "[text='{}']", t)?;
+        }
+        write!(f, ")")?;
+        Ok(())
+    }
+}
+
+impl fmt::Debug for Selector {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "[")?;
+        for (i, step) in self.0.iter().enumerate() {
+            if step.is_immediate {
+                write!(f, " > ")?;
+            } else if i > 0 {
+                write!(f, " ")?;
+            }
+            write!(f, "{:?}", step)?;
+        }
+        write!(f, "]")?;
+        Ok(())
+    }
+}
+
+pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
+    let src_dir_path = repo_path.join("src");
+    let properties_dir_path = repo_path.join("properties");
+
+    for entry in fs::read_dir(properties_dir_path)? {
+        let property_sheet_css_path = entry?.path();
+        let rules = parse_property_sheet(&property_sheet_css_path)?;
+
+        for rule in &rules {
+            eprintln!("rule: {:?}", rule);
+        }
+
+        let sheet = Builder::new(rules).build();
+        let property_sheet_json_path = src_dir_path
+            .join(property_sheet_css_path.file_name().unwrap())
+            .with_extension("json");
+        let mut property_sheet_json_file = File::create(property_sheet_json_path)?;
+        serde_json::to_writer_pretty(&mut property_sheet_json_file, &sheet)?;
+    }
+
+    Ok(())
+}
+
+fn parse_property_sheet(path: &Path) -> Result<Vec<Rule>> {
+    let mut i = 0;
+    let mut items = rsass::parse_scss_file(path)?;
+    while i < items.len() {
+        match &items[i] {
+            rsass::Item::Import(arg) => {
+                if let Some(s) = get_sass_string(arg) {
+                    let import_path = resolve_path(path, s)?;
+                    let imported_items = rsass::parse_scss_file(&import_path)?;
+                    items.splice(i..(i + 1), imported_items);
+                    continue;
+                } else {
+                    return Err(Error("@import arguments must be strings".to_string()));
+                }
+            }
+            rsass::Item::AtRule { name, args, .. } => match name.as_str() {
+                "schema" => {
+                    if let Some(s) = get_sass_string(args) {
+                        let schema_path = resolve_path(path, s)?;
+                        eprintln!("schema path: {:?}", schema_path);
+                        items.remove(i);
+                        continue;
+                    } else {
+                        return Err(Error("@schema arguments must be strings".to_string()));
+                    }
+                }
+                _ => return Err(Error(format!("Unsupported at-rule '{}'", name))),
+            },
+            _ => {}
+        }
+        i += 1;
+    }
+
+    let mut result = Vec::new();
+    let selector_prefixes = vec![Vec::new()];
+    parse_sass_items(items, &selector_prefixes, &mut result)?;
+    Ok(result)
+}
+
+fn parse_sass_items(
+    items: Vec<rsass::Item>,
+    selector_prefixes: &Vec<Vec<SelectorStep>>,
+    result: &mut Vec<Rule>,
+) -> Result<()> {
+    let mut properties = PropertySet::new();
+    for item in items {
+        match item {
+            rsass::Item::None | rsass::Item::Comment(_) => {}
+            rsass::Item::Property(name, value) => {
+                properties.insert(name.to_string(), parse_sass_value(&value)?);
+            }
+            rsass::Item::Rule(selectors, items) => {
+                let mut full_selectors = Vec::new();
+                for prefix in selector_prefixes {
+                    let mut part_string = String::new();
+                    let mut next_step_is_immediate = false;
+                    for selector in &selectors.s {
+                        let mut prefix = prefix.clone();
+                        for part in &selector.0 {
+                            part_string.clear();
+                            write!(&mut part_string, "{}", part).unwrap();
+                            let part_string = part_string.trim();
+                            if !part_string.is_empty() {
+                                if part_string == "&" {
+                                    continue;
+                                } else if part_string.starts_with("[text=") {
+                                    if let Some(last_step) = prefix.last_mut() {
+                                        last_step.text_pattern = Some(
+                                            part_string[7..(part_string.len() - 2)].to_string(),
+                                        )
+                                    }
+                                } else if part_string == ">" {
+                                    next_step_is_immediate = true;
+                                } else if part_string.starts_with("[token=") {
+                                    prefix.push(SelectorStep {
+                                        kind: part_string[8..(part_string.len() - 2)].to_string(),
+                                        is_named: false,
+                                        child_index: None,
+                                        text_pattern: None,
+                                        is_immediate: next_step_is_immediate,
+                                    });
+                                    next_step_is_immediate = false;
+                                } else {
+                                    prefix.push(SelectorStep {
+                                        kind: part_string.to_string(),
+                                        is_named: true,
+                                        child_index: None,
+                                        text_pattern: None,
+                                        is_immediate: next_step_is_immediate,
+                                    });
+                                    next_step_is_immediate = false;
+                                }
+                            }
+                        }
+                        full_selectors.push(prefix);
+                    }
+                }
+                parse_sass_items(items, &full_selectors, result)?;
+            }
+            _ => return Err(Error(format!("Unsupported syntax type {:?}", item))),
+        }
+    }
+
+    if !properties.is_empty() {
+        result.push(Rule {
+            selectors: selector_prefixes.iter().cloned().map(Selector).collect(),
+            properties,
+        });
+    }
+
+    Ok(())
+}
+
+fn parse_sass_value(value: &Value) -> Result<PropertyValue> {
+    match value {
+        Value::Literal(s) => {
+            if let Some(s) = s.single_raw() {
+                Ok(PropertyValue::String(s.to_string()))
+            } else {
+                Err(Error("String interpolation is not supported".to_string()))
+            }
+        }
+        Value::Call(name, raw_args) => {
+            if let Some(name) = name.single_raw() {
+                let mut args = Vec::new();
+                for (_, arg) in raw_args.iter() {
+                    args.push(parse_sass_value(arg)?);
+                }
+                let mut result = PropertySet::new();
+                result.insert("name".to_string(), PropertyValue::String(name.to_string()));
+                result.insert("args".to_string(), PropertyValue::Array(args));
+                Ok(PropertyValue::Object(result))
+            } else {
+                Err(Error("String interpolation is not supported".to_string()))
+            }
+        }
+        Value::List(elements, ..) => {
+            let mut result = Vec::new();
+            for element in elements {
+                result.push(parse_sass_value(element)?);
+            }
+            Ok(PropertyValue::Array(result))
+        }
+        Value::True => Ok(PropertyValue::String("true".to_string())),
+        Value::False => Ok(PropertyValue::String("false".to_string())),
+        _ => Err(Error(
+            "Property values must be strings or function calls".to_string(),
+        )),
+    }
+}
+
+fn get_sass_string(value: &Value) -> Option<&str> {
+    if let Value::Literal(s) = value {
+        s.single_raw()
+    } else {
+        None
+    }
+}
+
+fn resolve_path(base: &Path, path: impl AsRef<Path>) -> Result<PathBuf> {
+    let mut result = base.to_owned();
+    result.pop();
+    result.push(path.as_ref());
+    if result.exists() {
+        Ok(result)
+    } else {
+        Err(Error(format!(
+            "Could not resolve import path {:?}",
+            path.as_ref()
+        )))
+    }
+}
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 604d3068..334f06ef 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -6,6 +6,7 @@ extern crate log;
 extern crate serde_derive;
 extern crate hashbrown;
 extern crate regex;
+extern crate rsass;
 extern crate serde_json;
 
 mod error;
@@ -39,6 +40,7 @@ fn run() -> error::Result<()> {
             SubCommand::with_name("generate")
                 .about("Generate a parser")
                 .arg(Arg::with_name("log").long("log"))
+                .arg(Arg::with_name("properties-only").long("properties"))
                 .arg(
                     Arg::with_name("state-ids-to-log")
                         .long("log-state")
@@ -77,13 +79,14 @@ fn run() -> error::Result<()> {
         }
 
         let minimize = !matches.is_present("no-minimize");
+        let properties_only = matches.is_present("properties-only");
         let state_ids_to_log = matches
             .values_of("state-ids-to-log")
             .map_or(Vec::new(), |ids| {
                 ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
                     .collect()
             });
-        generate::generate_parser_for_grammar(&current_dir, minimize, state_ids_to_log)?;
+        generate::generate_parser_for_grammar(&current_dir, minimize, state_ids_to_log, properties_only)?;
     } else if let Some(matches) = matches.subcommand_matches("test") {
         let debug = matches.is_present("debug");
         let debug_graph = matches.is_present("debug-graph");
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index ae3c979c..37748447 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -80,6 +80,29 @@ pub struct PropertySheet<P = HashMap<String, String>> {
     text_regexes: Vec<Regex>,
 }
 
+#[derive(Debug, Deserialize, Serialize)]
+pub struct PropertyTransitionJSON {
+    #[serde(rename = "type")]
+    pub kind: String,
+    pub named: bool,
+    pub index: Option<usize>,
+    pub text: Option<String>,
+    pub state_id: usize,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+pub struct PropertyStateJSON {
+    pub transitions: Vec<PropertyTransitionJSON>,
+    pub property_set_id: usize,
+    pub default_next_state_id: usize,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+pub struct PropertySheetJSON<P> {
+    pub states: Vec<PropertyStateJSON>,
+    pub property_sets: Vec<P>,
+}
+
 #[derive(Clone, Copy)]
 pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
 
@@ -189,7 +212,7 @@ impl Parser {
     }
 
     #[cfg(unix)]
-    pub fn print_dot_graphs(&mut self, file: & impl AsRawFd) {
+    pub fn print_dot_graphs(&mut self, file: &impl AsRawFd) {
         let fd = file.as_raw_fd();
         unsafe { ffi::ts_parser_print_dot_graphs(self.0, ffi::dup(fd)) }
     }
@@ -754,29 +777,6 @@ impl<P> PropertySheet<P> {
     where
         P: DeserializeOwned,
     {
-        #[derive(Deserialize, Debug)]
-        struct PropertyTransitionJSON {
-            #[serde(rename = "type")]
-            kind: String,
-            named: bool,
-            index: Option<usize>,
-            text: Option<String>,
-            state_id: usize,
-        }
-
-        #[derive(Deserialize, Debug)]
-        struct PropertyStateJSON {
-            transitions: Vec<PropertyTransitionJSON>,
-            property_set_id: usize,
-            default_next_state_id: usize,
-        }
-
-        #[derive(Deserialize, Debug)]
-        struct PropertySheetJSON<P> {
-            states: Vec<PropertyStateJSON>,
-            property_sets: Vec<P>,
-        }
-
         let input: PropertySheetJSON<P> =
             serde_json::from_str(json).map_err(PropertySheetError::InvalidJSON)?;
         let mut states = Vec::new();

From 1dfbe495ed55e35e4f94a0eb8e19de65edc2e414 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 10 Jan 2019 13:12:16 -0800
Subject: [PATCH 115/208] Get property sheet compilation working

---
 cli/src/generate/properties.rs | 540 ++++++++++++++++++++++++++++++---
 lib/binding/lib.rs             |   9 +-
 2 files changed, 504 insertions(+), 45 deletions(-)

diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index c328526f..136cd725 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -1,15 +1,17 @@
 use crate::error::{Error, Result};
+use hashbrown::hash_map::{Entry, HashMap};
+use hashbrown::HashSet;
 use rsass;
 use rsass::sass::Value;
-use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
-use std::fmt;
-use std::fmt::Write;
+use std::cmp::Ordering;
+use std::collections::{BTreeMap, VecDeque};
+use std::fmt::{self, Write};
 use std::fs::{self, File};
-use std::hash::{Hash, Hasher};
+use std::io::BufWriter;
 use std::path::{Path, PathBuf};
 use tree_sitter::{self, PropertyStateJSON, PropertyTransitionJSON};
 
-#[derive(Debug, PartialEq, Eq, Hash, Serialize)]
+#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
 #[serde(untagged)]
 enum PropertyValue {
     String(String),
@@ -17,17 +19,17 @@ enum PropertyValue {
     Array(Vec<PropertyValue>),
 }
 
-type PropertySet = BTreeMap<String, PropertyValue>;
+type PropertySet = std::collections::HashMap<String, PropertyValue>;
 type PropertySheetJSON = tree_sitter::PropertySheetJSON<PropertySet>;
-type StateId = u32;
-type PropertySetId = u32;
+type StateId = usize;
+type PropertySetId = usize;
 
 #[derive(Clone, PartialEq, Eq)]
 struct SelectorStep {
     kind: String,
     is_named: bool,
     is_immediate: bool,
-    child_index: Option<i32>,
+    child_index: Option<usize>,
     text_pattern: Option<String>,
 }
 
@@ -40,29 +42,48 @@ struct Rule {
     properties: PropertySet,
 }
 
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-struct PropertyItem {
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+struct Item {
     rule_id: u32,
     selector_id: u32,
     step_id: u32,
 }
 
-#[derive(PartialEq, Eq)]
-struct PropertyItemSet(BTreeSet<PropertyItem>);
+#[derive(Clone, PartialEq, Eq, Hash)]
+struct ItemSet(Vec<Item>);
 
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 struct SelectorMatch {
     specificity: u32,
     rule_id: u32,
-    selector_id: u32,
 }
 
 struct Builder {
     rules: Vec<Rule>,
     output: PropertySheetJSON,
-    ids_by_item_set: HashMap<PropertyItemSet, StateId>,
-    ids_by_property_set: HashMap<PropertySet, PropertySetId>,
-    item_set_queue: VecDeque<(PropertyItemSet, StateId)>,
+    ids_by_item_set: HashMap<ItemSet, StateId>,
+    item_set_queue: VecDeque<(ItemSet, StateId)>,
+}
+
+impl ItemSet {
+    fn new() -> Self {
+        ItemSet(Vec::new())
+    }
+
+    fn insert(&mut self, item: Item) {
+        match self.0.binary_search(&item) {
+            Err(i) => self.0.insert(i, item),
+            _ => {}
+        }
+    }
+}
+
+impl<'a> IntoIterator for &'a ItemSet {
+    type Item = &'a Item;
+    type IntoIter = std::slice::Iter<'a, Item>;
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.iter()
+    }
 }
 
 impl Builder {
@@ -74,25 +95,301 @@ impl Builder {
                 property_sets: Vec::new(),
             },
             ids_by_item_set: HashMap::new(),
-            ids_by_property_set: HashMap::new(),
             item_set_queue: VecDeque::new(),
         }
     }
 
-    fn build(self) -> PropertySheetJSON {
-        let mut start_item_set = PropertyItemSet(BTreeSet::new());
+    fn build(mut self) -> PropertySheetJSON {
+        let mut start_item_set = ItemSet::new();
+        for (i, rule) in self.rules.iter().enumerate() {
+            for j in 0..rule.selectors.len() {
+                start_item_set.insert(Item {
+                    rule_id: i as u32,
+                    selector_id: j as u32,
+                    step_id: 0,
+                });
+            }
+        }
+
+        self.add_state(start_item_set);
+        while let Some((item_set, state_id)) = self.item_set_queue.pop_front() {
+            self.populate_state(item_set, state_id);
+        }
+
+        self.remove_duplicate_states();
+
+        for (i, state) in self.output.states.iter_mut().enumerate() {
+            state.id = i;
+        }
 
         self.output
     }
-}
 
-impl Hash for PropertyItemSet {
-    fn hash<H: Hasher>(&self, h: &mut H) {
-        h.write_usize(self.0.len());
-        for entry in &self.0 {
-            entry.hash(h);
+    fn add_state(&mut self, item_set: ItemSet) -> StateId {
+        match self.ids_by_item_set.entry(item_set) {
+            Entry::Occupied(o) => *o.get(),
+            Entry::Vacant(v) => {
+                let state_id = self.output.states.len();
+                self.output.states.push(PropertyStateJSON {
+                    id: 0,
+                    transitions: Vec::new(),
+                    property_set_id: 0,
+                    default_next_state_id: 0,
+                });
+                self.item_set_queue.push_back((v.key().clone(), state_id));
+                v.insert(state_id);
+                state_id
+            }
         }
     }
+
+    fn add_property_set(&mut self, properties: PropertySet) -> PropertySetId {
+        if let Some(index) = self
+            .output
+            .property_sets
+            .iter()
+            .position(|i| *i == properties)
+        {
+            index
+        } else {
+            self.output.property_sets.push(properties);
+            self.output.property_sets.len() - 1
+        }
+    }
+
+    fn populate_state(&mut self, item_set: ItemSet, state_id: StateId) {
+        let mut transition_map: HashSet<PropertyTransitionJSON> = HashSet::new();
+        let mut selector_matches = Vec::new();
+
+        // First, compute all of the possible state transition predicates for
+        // this state, and all of the rules that are currently matching.
+        for item in &item_set {
+            let rule = &self.rules[item.rule_id as usize];
+            let selector = &rule.selectors[item.selector_id as usize];
+            let next_step = selector.0.get(item.step_id as usize);
+
+            // If this item has more elements remaining in its selector, then
+            // add a state transition based on the next step.
+            if let Some(step) = next_step {
+                transition_map.insert(PropertyTransitionJSON {
+                    kind: step.kind.clone(),
+                    named: step.is_named,
+                    index: step.child_index,
+                    text: step.text_pattern.clone(),
+                    state_id: 0,
+                });
+            }
+            // If the item has matched its entire selector, then the item's
+            // properties are applicable to this state.
+            else {
+                selector_matches.push(SelectorMatch {
+                    rule_id: item.rule_id,
+                    specificity: selector_specificity(selector),
+                });
+            }
+        }
+
+        // For eacy possible state transition, compute the set of items in that transition's
+        // destination state.
+        let mut transition_list: Vec<(PropertyTransitionJSON, u32)> = transition_map
+            .into_iter()
+            .map(|mut transition| {
+                let mut next_item_set = ItemSet::new();
+                let mut latest_matching_rule_id = 0;
+                for item in &item_set {
+                    let rule = &self.rules[item.rule_id as usize];
+                    let selector = &rule.selectors[item.selector_id as usize];
+                    let next_step = selector.0.get(item.step_id as usize);
+
+                    if let Some(step) = next_step {
+                        // If the next step of the item's selector satisfies this transition,
+                        // advance the item to the next part of its selector and add the
+                        // resulting item to this transition's destination state.
+                        if step_matches_transition(step, &transition) {
+                            let next_item = Item {
+                                rule_id: item.rule_id,
+                                selector_id: item.selector_id,
+                                step_id: item.step_id + 1,
+                            };
+
+                            next_item_set.insert(next_item);
+
+                            // If the next item is at the end of its selector, record its rule id
+                            // so that the rule id can be used when sorting this state's transitions.
+                            if selector.0.get(item.step_id as usize + 1).is_none()
+                                && item.rule_id > latest_matching_rule_id
+                            {
+                                latest_matching_rule_id = item.rule_id;
+                            }
+                        }
+
+                        // If the next step of the item is not an immediate child, then
+                        // include this item in this transition's destination state, because
+                        // the next step of the item might match a descendant node.
+                        if !step.is_immediate {
+                            next_item_set.insert(*item);
+                        }
+                    }
+                }
+
+                transition.state_id = self.add_state(next_item_set);
+                (transition, latest_matching_rule_id)
+            })
+            .collect();
+
+        // Ensure that for a given node type, more specific transitions are tried
+        // first, and in the event of a tie, transitions corresponding to later rules
+        // in the cascade are tried first.
+        transition_list.sort_by(|a, b| {
+            let result = a.0.kind.cmp(&b.0.kind);
+            if result != Ordering::Equal {
+                return result;
+            }
+            let result = a.0.named.cmp(&b.0.named);
+            if result != Ordering::Equal {
+                return result;
+            }
+            let result = transition_specificity(&b.0).cmp(&transition_specificity(&a.0));
+            if result != Ordering::Equal {
+                return result;
+            }
+            b.1.cmp(&a.1)
+        });
+
+        // Compute the merged properties that apply in the current state.
+        // Sort the matching property sets by ascending specificity and by
+        // their order in the sheet. This way, more specific selectors and later
+        // rules will override less specific selectors and earlier rules.
+        let mut properties = PropertySet::new();
+        selector_matches.sort_unstable_by(|a, b| {
+            let result = a.specificity.cmp(&b.specificity);
+            if result != Ordering::Equal {
+                return result;
+            }
+            a.rule_id.cmp(&b.rule_id)
+        });
+        selector_matches.dedup();
+        for selector_match in selector_matches {
+            let rule = &self.rules[selector_match.rule_id as usize];
+            for (property, value) in &rule.properties {
+                properties.insert(property.clone(), value.clone());
+            }
+        }
+
+        // Compute the default successor item set - the item set that
+        // we should advance to if the next element doesn't match any
+        // of the next elements in the item set's selectors.
+        let mut default_next_item_set = ItemSet::new();
+        for item in &item_set {
+            let rule = &self.rules[item.rule_id as usize];
+            let selector = &rule.selectors[item.selector_id as usize];
+            let next_step = selector.0.get(item.step_id as usize);
+            if let Some(step) = next_step {
+                if !step.is_immediate {
+                    default_next_item_set.insert(*item);
+                }
+            }
+        }
+
+        self.output.states[state_id].default_next_state_id = self.add_state(default_next_item_set);
+        self.output.states[state_id].property_set_id = self.add_property_set(properties);
+        self.output.states[state_id]
+            .transitions
+            .extend(transition_list.into_iter().map(|i| i.0));
+    }
+
+    fn remove_duplicate_states(&mut self) {
+        let mut state_replacements = BTreeMap::new();
+        let mut done = false;
+        while !done {
+            done = true;
+            for (i, state_i) in self.output.states.iter().enumerate() {
+                if state_replacements.contains_key(&i) {
+                    continue;
+                }
+                for (j, state_j) in self.output.states.iter().enumerate() {
+                    if j == i {
+                        break;
+                    }
+                    if state_replacements.contains_key(&j) {
+                        continue;
+                    }
+                    if state_i == state_j {
+                        info!("replace state {} with state {}", i, j);
+                        state_replacements.insert(i, j);
+                        done = false;
+                        break;
+                    }
+                }
+            }
+            for state in self.output.states.iter_mut() {
+                for transition in state.transitions.iter_mut() {
+                    if let Some(replacement) = state_replacements.get(&transition.state_id) {
+                        transition.state_id = *replacement;
+                    }
+                }
+            }
+        }
+
+        let final_state_replacements = (0..self.output.states.len())
+            .into_iter()
+            .map(|state_id| {
+                let replacement = state_replacements
+                    .get(&state_id)
+                    .cloned()
+                    .unwrap_or(state_id);
+                let prior_removed = state_replacements
+                    .iter()
+                    .take_while(|i| *i.0 < replacement)
+                    .count();
+                replacement - prior_removed
+            })
+            .collect::<Vec<_>>();
+
+        for state in self.output.states.iter_mut() {
+            for transition in state.transitions.iter_mut() {
+                transition.state_id = final_state_replacements[transition.state_id];
+            }
+        }
+
+        let mut i = 0;
+        self.output.states.retain(|_| {
+            let result = !state_replacements.contains_key(&i);
+            i += 1;
+            result
+        });
+    }
+}
+
+fn selector_specificity(selector: &Selector) -> u32 {
+    let mut result = selector.0.len() as u32;
+    for step in &selector.0 {
+        if step.child_index.is_some() {
+            result += 1;
+        }
+        if step.text_pattern.is_some() {
+            result += 1;
+        }
+    }
+    result
+}
+
+fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 {
+    let mut result = 0;
+    if transition.index.is_some() {
+        result += 1;
+    }
+    if transition.text.is_some() {
+        result += 1;
+    }
+    result
+}
+
+fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJSON) -> bool {
+    step.kind == transition.kind
+        && step.is_named == transition.named
+        && (step.child_index == transition.index || step.child_index.is_none())
+        && (step.text_pattern == transition.text || step.text_pattern.is_none())
 }
 
 impl fmt::Debug for SelectorStep {
@@ -135,27 +432,28 @@ pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
     let properties_dir_path = repo_path.join("properties");
 
     for entry in fs::read_dir(properties_dir_path)? {
-        let property_sheet_css_path = entry?.path();
-        let rules = parse_property_sheet(&property_sheet_css_path)?;
-
-        for rule in &rules {
-            eprintln!("rule: {:?}", rule);
-        }
-
-        let sheet = Builder::new(rules).build();
+        let css_path = entry?.path();
+        let css = fs::read_to_string(&css_path)?;
+        let sheet = generate_property_sheet(&css_path, &css)?;
         let property_sheet_json_path = src_dir_path
-            .join(property_sheet_css_path.file_name().unwrap())
+            .join(css_path.file_name().unwrap())
             .with_extension("json");
-        let mut property_sheet_json_file = File::create(property_sheet_json_path)?;
-        serde_json::to_writer_pretty(&mut property_sheet_json_file, &sheet)?;
+        let property_sheet_json_file = File::create(property_sheet_json_path)?;
+        let mut writer = BufWriter::new(property_sheet_json_file);
+        serde_json::to_writer_pretty(&mut writer, &sheet)?;
     }
 
     Ok(())
 }
 
-fn parse_property_sheet(path: &Path) -> Result<Vec<Rule>> {
+fn generate_property_sheet(path: impl AsRef<Path>, css: &str) -> Result<PropertySheetJSON> {
+    let rules = parse_property_sheet(path.as_ref(), &css)?;
+    Ok(Builder::new(rules).build())
+}
+
+fn parse_property_sheet(path: &Path, css: &str) -> Result<Vec<Rule>> {
     let mut i = 0;
-    let mut items = rsass::parse_scss_file(path)?;
+    let mut items = rsass::parse_scss_data(css.as_bytes())?;
     while i < items.len() {
         match &items[i] {
             rsass::Item::Import(arg) => {
@@ -296,11 +594,14 @@ fn parse_sass_value(value: &Value) -> Result<PropertyValue> {
             }
             Ok(PropertyValue::Array(result))
         }
+        Value::Color(_, Some(name)) => Ok(PropertyValue::String(name.clone())),
+        Value::Numeric(n, _) => Ok(PropertyValue::String(format!("{}", n))),
         Value::True => Ok(PropertyValue::String("true".to_string())),
         Value::False => Ok(PropertyValue::String("false".to_string())),
-        _ => Err(Error(
-            "Property values must be strings or function calls".to_string(),
-        )),
+        _ => Err(Error(format!(
+            "Property values must be strings or function calls. Got {:?}",
+            value
+        ))),
     }
 }
 
@@ -325,3 +626,158 @@ fn resolve_path(base: &Path, path: impl AsRef<Path>) -> Result<PathBuf> {
         )))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use regex::Regex;
+
+    #[test]
+    fn test_immediate_child_and_descendant_selectors() {
+        let sheet = generate_property_sheet(
+            "foo",
+            "
+                f1 {
+                  color: red;
+
+                  & > f2 {
+                    color: green;
+                  }
+
+                  & f3 {
+                    color: blue;
+                  }
+                }
+
+                f2 {
+                  color: indigo;
+                  height: 2;
+                }
+
+                f3 {
+                  color: violet;
+                  height: 3;
+                }
+            ",
+        )
+        .unwrap();
+
+        // f1 single-element selector
+        assert_eq!(
+            *query_simple(&sheet, vec!["f1"]),
+            props(&[("color", "red")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f2", "f1"]),
+            props(&[("color", "red")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f2", "f3", "f1"]),
+            props(&[("color", "red")])
+        );
+
+        // f2 single-element selector
+        assert_eq!(
+            *query_simple(&sheet, vec!["f2"]),
+            props(&[("color", "indigo"), ("height", "2")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f2", "f2"]),
+            props(&[("color", "indigo"), ("height", "2")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f1", "f3", "f2"]),
+            props(&[("color", "indigo"), ("height", "2")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f1", "f6", "f2"]),
+            props(&[("color", "indigo"), ("height", "2")])
+        );
+
+        // f3 single-element selector
+        assert_eq!(
+            *query_simple(&sheet, vec!["f3"]),
+            props(&[("color", "violet"), ("height", "3")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f2", "f3"]),
+            props(&[("color", "violet"), ("height", "3")])
+        );
+
+        // f2 child selector
+        assert_eq!(
+            *query_simple(&sheet, vec!["f1", "f2"]),
+            props(&[("color", "green"), ("height", "2")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f2", "f1", "f2"]),
+            props(&[("color", "green"), ("height", "2")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f3", "f1", "f2"]),
+            props(&[("color", "green"), ("height", "2")])
+        );
+
+        // f3 descendant selector
+        assert_eq!(
+            *query_simple(&sheet, vec!["f1", "f3"]),
+            props(&[("color", "blue"), ("height", "3")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f1", "f2", "f3"]),
+            props(&[("color", "blue"), ("height", "3")])
+        );
+        assert_eq!(
+            *query_simple(&sheet, vec!["f1", "f6", "f7", "f8", "f3"]),
+            props(&[("color", "blue"), ("height", "3")])
+        );
+
+        // no match
+        assert_eq!(
+            *query_simple(&sheet, vec!["f1", "f3", "f4"]),
+            props(&[])
+        );
+    }
+
+    fn query_simple<'a>(
+        sheet: &'a PropertySheetJSON,
+        node_stack: Vec<&'static str>,
+    ) -> &'a PropertySet {
+        query(
+            sheet,
+            node_stack.into_iter().map(|s| (s, true, 0)).collect(),
+            "",
+        )
+    }
+
+    fn query<'a>(
+        sheet: &'a PropertySheetJSON,
+        node_stack: Vec<(&'static str, bool, usize)>,
+        leaf_text: &str,
+    ) -> &'a PropertySet {
+        let mut state_id = 0;
+        for (kind, is_named, child_index) in node_stack {
+            let state = &sheet.states[state_id];
+            state_id = state
+                .transitions
+                .iter()
+                .find(|transition| {
+                    transition.kind == kind
+                        && transition.named == is_named
+                        && transition.index.map_or(true, |index| index == child_index)
+                        && (transition
+                            .text
+                            .as_ref()
+                            .map_or(true, |text| Regex::new(text).unwrap().is_match(leaf_text)))
+                })
+                .map_or(state.default_next_state_id, |t| t.state_id);
+        }
+        &sheet.property_sets[sheet.states[state_id].property_set_id]
+    }
+
+    fn props<'a>(s: &'a [(&'a str, &'a str)]) -> PropertySet {
+        s.into_iter()
+            .map(|(a, b)| (a.to_string(), PropertyValue::String(b.to_string())))
+            .collect()
+    }
+}
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index 37748447..88cc24be 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -80,20 +80,23 @@ pub struct PropertySheet<P = HashMap<String, String>> {
     text_regexes: Vec<Regex>,
 }
 
-#[derive(Debug, Deserialize, Serialize)]
+#[derive(Debug, Deserialize, Serialize, Hash, PartialEq, Eq)]
 pub struct PropertyTransitionJSON {
     #[serde(rename = "type")]
     pub kind: String,
     pub named: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub index: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub text: Option<String>,
     pub state_id: usize,
 }
 
-#[derive(Debug, Deserialize, Serialize)]
+#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub struct PropertyStateJSON {
-    pub transitions: Vec<PropertyTransitionJSON>,
+    pub id: usize,
     pub property_set_id: usize,
+    pub transitions: Vec<PropertyTransitionJSON>,
     pub default_next_state_id: usize,
 }
 

From 38417fc8a1e04bae2d2b0c14f3aa2c310bdec264 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 10 Jan 2019 15:17:38 -0800
Subject: [PATCH 116/208] Port over remaining tests about property sheet
 generation

---
 cli/src/generate/properties.rs | 239 ++++++++++++++++++++++++++++++---
 1 file changed, 223 insertions(+), 16 deletions(-)

diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index 136cd725..9e570a99 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -1,10 +1,9 @@
 use crate::error::{Error, Result};
-use hashbrown::hash_map::{Entry, HashMap};
-use hashbrown::HashSet;
 use rsass;
 use rsass::sass::Value;
 use std::cmp::Ordering;
-use std::collections::{BTreeMap, VecDeque};
+use std::collections::hash_map::Entry;
+use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
 use std::fmt::{self, Write};
 use std::fs::{self, File};
 use std::io::BufWriter;
@@ -19,7 +18,7 @@ enum PropertyValue {
     Array(Vec<PropertyValue>),
 }
 
-type PropertySet = std::collections::HashMap<String, PropertyValue>;
+type PropertySet = HashMap<String, PropertyValue>;
 type PropertySheetJSON = tree_sitter::PropertySheetJSON<PropertySet>;
 type StateId = usize;
 type PropertySetId = usize;
@@ -448,6 +447,9 @@ pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
 
 fn generate_property_sheet(path: impl AsRef<Path>, css: &str) -> Result<PropertySheetJSON> {
     let rules = parse_property_sheet(path.as_ref(), &css)?;
+    for rule in &rules {
+        eprintln!("rule {:?}", rule);
+    }
     Ok(Builder::new(rules).build())
 }
 
@@ -470,7 +472,6 @@ fn parse_property_sheet(path: &Path, css: &str) -> Result<Vec<Rule>> {
                 "schema" => {
                     if let Some(s) = get_sass_string(args) {
                         let schema_path = resolve_path(path, s)?;
-                        eprintln!("schema path: {:?}", schema_path);
                         items.remove(i);
                         continue;
                     } else {
@@ -500,7 +501,22 @@ fn parse_sass_items(
         match item {
             rsass::Item::None | rsass::Item::Comment(_) => {}
             rsass::Item::Property(name, value) => {
-                properties.insert(name.to_string(), parse_sass_value(&value)?);
+                let value = parse_sass_value(&value)?;
+                match properties.entry(name.to_string()) {
+                    Entry::Vacant(v) => {
+                        v.insert(value);
+                    }
+                    Entry::Occupied(mut o) => {
+                        let existing_value = o.get_mut();
+                        if let PropertyValue::Array(items) = existing_value {
+                            items.push(value);
+                            continue;
+                        } else {
+                            let v = existing_value.clone();
+                            *existing_value = PropertyValue::Array(vec![v, value]);
+                        }
+                    }
+                }
             }
             rsass::Item::Rule(selectors, items) => {
                 let mut full_selectors = Vec::new();
@@ -516,6 +532,15 @@ fn parse_sass_items(
                             if !part_string.is_empty() {
                                 if part_string == "&" {
                                     continue;
+                                } else if part_string.starts_with(":nth-child(") {
+                                    if let Some(last_step) = prefix.last_mut() {
+                                        if let Ok(index) = usize::from_str_radix(
+                                            &part_string[11..(part_string.len() - 1)],
+                                            10,
+                                        ) {
+                                            last_step.child_index = Some(index);
+                                        }
+                                    }
                                 } else if part_string.starts_with("[text=") {
                                     if let Some(last_step) = prefix.last_mut() {
                                         last_step.text_pattern = Some(
@@ -613,18 +638,30 @@ fn get_sass_string(value: &Value) -> Option<&str> {
     }
 }
 
-fn resolve_path(base: &Path, path: impl AsRef<Path>) -> Result<PathBuf> {
+fn resolve_path(base: &Path, p: &str) -> Result<PathBuf> {
+    let path = Path::new(p);
     let mut result = base.to_owned();
     result.pop();
-    result.push(path.as_ref());
-    if result.exists() {
-        Ok(result)
+    if path.starts_with(".") {
+        result.push(path);
+        if result.exists() {
+            return Ok(result);
+        }
     } else {
-        Err(Error(format!(
-            "Could not resolve import path {:?}",
-            path.as_ref()
-        )))
+        loop {
+            result.push("node_modules");
+            result.push(path);
+            if result.exists() {
+                return Ok(result);
+            }
+            result.pop();
+            result.pop();
+            if !result.pop() {
+                break;
+            }
+        }
     }
+    Err(Error(format!("Could not resolve import path `{}`", p)))
 }
 
 #[cfg(test)]
@@ -635,7 +672,7 @@ mod tests {
     #[test]
     fn test_immediate_child_and_descendant_selectors() {
         let sheet = generate_property_sheet(
-            "foo",
+            "foo.css",
             "
                 f1 {
                   color: red;
@@ -733,10 +770,164 @@ mod tests {
         );
 
         // no match
+        assert_eq!(*query_simple(&sheet, vec!["f1", "f3", "f4"]), props(&[]));
+        assert_eq!(*query_simple(&sheet, vec!["f1", "f2", "f5"]), props(&[]));
+    }
+
+    #[test]
+    fn test_text_attribute() {
+        let sheet = generate_property_sheet(
+            "foo.css",
+            "
+                f1 {
+                  color: red;
+
+                  &[text='^[A-Z]'] {
+                    color: green;
+                  }
+
+                  &[text='^[A-Z_]+$'] {
+                    color: blue;
+                  }
+                }
+
+                f2[text='^[A-Z_]+$'] {
+                  color: purple;
+                }
+            ",
+        )
+        .unwrap();
+
         assert_eq!(
-            *query_simple(&sheet, vec!["f1", "f3", "f4"]),
+            *query(&sheet, vec![("f1", true, 0)], "abc"),
+            props(&[("color", "red")])
+        );
+        assert_eq!(
+            *query(&sheet, vec![("f1", true, 0)], "Abc"),
+            props(&[("color", "green")])
+        );
+        assert_eq!(
+            *query(&sheet, vec![("f1", true, 0)], "AB_CD"),
+            props(&[("color", "blue")])
+        );
+        assert_eq!(*query(&sheet, vec![("f2", true, 0)], "Abc"), props(&[]));
+        assert_eq!(
+            *query(&sheet, vec![("f2", true, 0)], "ABC"),
+            props(&[("color", "purple")])
+        );
+    }
+
+    #[test]
+    fn test_cascade_ordering_as_tie_breaker() {
+        let sheet = generate_property_sheet(
+            "foo.css",
+            "
+                f1 f2:nth-child(1) { color: red; }
+                f1:nth-child(1) f2 { color: green; }
+                f1 f2[text='a'] { color: blue; }
+                f1 f2[text='b'] { color: violet; }
+            ",
+        )
+        .unwrap();
+
+        assert_eq!(
+            *query(&sheet, vec![("f1", true, 0), ("f2", true, 0)], "x"),
             props(&[])
         );
+        assert_eq!(
+            *query(&sheet, vec![("f1", true, 0), ("f2", true, 1)], "x"),
+            props(&[("color", "red")])
+        );
+        assert_eq!(
+            *query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "x"),
+            props(&[("color", "green")])
+        );
+        assert_eq!(
+            *query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "a"),
+            props(&[("color", "blue")])
+        );
+        assert_eq!(
+            *query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "ab"),
+            props(&[("color", "violet")])
+        );
+    }
+
+    #[test]
+    fn test_css_function_calls() {
+        let sheet = generate_property_sheet(
+            "foo.css",
+            "
+                a {
+                  b: f();
+                  c: f(g(h), i, \"j\", 10);
+                }
+            ",
+        )
+        .unwrap();
+
+        let p = query_simple(&sheet, vec!["a"]);
+
+        assert_eq!(
+            p["b"],
+            object(&[("name", string("f")), ("args", array(vec![])),])
+        );
+
+        assert_eq!(
+            p["c"],
+            object(&[
+                ("name", string("f")),
+                (
+                    "args",
+                    array(vec![
+                        object(&[("name", string("g")), ("args", array(vec![string("h"),]))]),
+                        string("i"),
+                        string("j"),
+                        string("10"),
+                    ])
+                ),
+            ])
+        );
+    }
+
+    #[test]
+    fn test_array_by_declaring_property_multiple_times() {
+        let sheet = generate_property_sheet(
+            "foo.css",
+            "
+                a {
+                  b: 'foo';
+                  b: 'bar';
+                  b: 'baz';
+                  c: f(g());
+                  c: h();
+                }
+            ",
+        )
+        .unwrap();
+
+        let p = query_simple(&sheet, vec!["a"]);
+
+        assert_eq!(
+            p["b"],
+            array(vec![string("foo"), string("bar"), string("baz"),])
+        );
+
+        assert_eq!(
+            p["c"],
+            array(vec![
+                object(&[
+                    ("name", string("f")),
+                    (
+                        "args",
+                        array(vec![object(&[
+                            ("name", string("g")),
+                            ("args", array(vec![])),
+                        ])])
+                    )
+                ]),
+                object(&[("name", string("h")), ("args", array(vec![])),]),
+            ]),
+        );
     }
 
     fn query_simple<'a>(
@@ -775,6 +966,22 @@ mod tests {
         &sheet.property_sets[sheet.states[state_id].property_set_id]
     }
 
+    fn array(s: Vec<PropertyValue>) -> PropertyValue {
+        PropertyValue::Array(s)
+    }
+
+    fn object<'a>(s: &'a [(&'a str, PropertyValue)]) -> PropertyValue {
+        PropertyValue::Object(
+            s.into_iter()
+                .map(|(a, b)| (a.to_string(), b.clone()))
+                .collect(),
+        )
+    }
+
+    fn string(s: &str) -> PropertyValue {
+        PropertyValue::String(s.to_string())
+    }
+
     fn props<'a>(s: &'a [(&'a str, &'a str)]) -> PropertySet {
         s.into_iter()
             .map(|(a, b)| (a.to_string(), PropertyValue::String(b.to_string())))

From 0f2347b3189503c768fc070a373d33407f9c4db6 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 10 Jan 2019 15:22:39 -0800
Subject: [PATCH 117/208] Just call the C lib 'the library' everywhere, don't
 call it a 'runtime'

---
 lib/include/tree_sitter/{runtime.h => api.h} |  6 ++--
 lib/include/tree_sitter/compiler.h           | 38 --------------------
 lib/include/tree_sitter/parser.h             |  2 +-
 lib/src/alloc.h                              |  6 ++--
 lib/src/array.h                              |  6 ++--
 lib/src/atomic.h                             |  6 ++--
 lib/src/error_costs.h                        |  4 +--
 lib/src/get_changed_ranges.h                 |  6 ++--
 lib/src/language.h                           |  6 ++--
 lib/src/length.h                             |  6 ++--
 lib/src/lexer.h                              |  8 ++---
 lib/src/{runtime.c => lib.c}                 |  0
 lib/src/parser.c                             |  2 +-
 lib/src/point.h                              |  6 ++--
 lib/src/reduce_action.h                      |  8 ++---
 lib/src/stack.h                              |  6 ++--
 lib/src/subtree.h                            |  8 ++---
 lib/src/tree.c                               |  2 +-
 lib/src/tree.h                               |  6 ++--
 lib/src/tree_cursor.c                        |  2 +-
 lib/src/tree_cursor.h                        |  6 ++--
 lib/src/utf16.h                              |  6 ++--
 script/bindgen.sh                            |  2 +-
 script/{build-runtime => build-lib}          |  2 +-
 test/fuzz/fuzzer.cc                          |  2 +-
 25 files changed, 57 insertions(+), 95 deletions(-)
 rename lib/include/tree_sitter/{runtime.h => api.h} (98%)
 delete mode 100644 lib/include/tree_sitter/compiler.h
 rename lib/src/{runtime.c => lib.c} (100%)
 rename script/{build-runtime => build-lib} (93%)

diff --git a/lib/include/tree_sitter/runtime.h b/lib/include/tree_sitter/api.h
similarity index 98%
rename from lib/include/tree_sitter/runtime.h
rename to lib/include/tree_sitter/api.h
index ab69a0b5..16841c8e 100644
--- a/lib/include/tree_sitter/runtime.h
+++ b/lib/include/tree_sitter/api.h
@@ -1,5 +1,5 @@
-#ifndef TREE_SITTER_RUNTIME_H_
-#define TREE_SITTER_RUNTIME_H_
+#ifndef TREE_SITTER_API_H_
+#define TREE_SITTER_API_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -153,4 +153,4 @@ uint32_t ts_language_version(const TSLanguage *);
 }
 #endif
 
-#endif  // TREE_SITTER_RUNTIME_H_
+#endif  // TREE_SITTER_API_H_
diff --git a/lib/include/tree_sitter/compiler.h b/lib/include/tree_sitter/compiler.h
deleted file mode 100644
index a84d8a75..00000000
--- a/lib/include/tree_sitter/compiler.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef TREE_SITTER_COMPILER_H_
-#define TREE_SITTER_COMPILER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-
-typedef enum {
-  TSCompileErrorTypeNone,
-  TSCompileErrorTypeInvalidGrammar,
-  TSCompileErrorTypeInvalidRegex,
-  TSCompileErrorTypeUndefinedSymbol,
-  TSCompileErrorTypeInvalidExtraToken,
-  TSCompileErrorTypeInvalidExternalToken,
-  TSCompileErrorTypeLexConflict,
-  TSCompileErrorTypeParseConflict,
-  TSCompileErrorTypeEpsilonRule,
-  TSCompileErrorTypeInvalidTokenContents,
-  TSCompileErrorTypeInvalidRuleName,
-  TSCompileErrorTypeInvalidWordRule,
-} TSCompileErrorType;
-
-typedef struct {
-  char *code;
-  char *error_message;
-  TSCompileErrorType error_type;
-} TSCompileResult;
-
-TSCompileResult ts_compile_grammar(const char *input, FILE *log_file);
-TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TREE_SITTER_COMPILER_H_
diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h
index a757eac0..e5037062 100644
--- a/lib/include/tree_sitter/parser.h
+++ b/lib/include/tree_sitter/parser.h
@@ -13,7 +13,7 @@ extern "C" {
 #define ts_builtin_sym_end 0
 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
 
-#ifndef TREE_SITTER_RUNTIME_H_
+#ifndef TREE_SITTER_API_H_
 typedef uint16_t TSSymbol;
 typedef struct TSLanguage TSLanguage;
 #endif
diff --git a/lib/src/alloc.h b/lib/src/alloc.h
index 6f3fa347..8e027a99 100644
--- a/lib/src/alloc.h
+++ b/lib/src/alloc.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_ALLOC_H_
-#define RUNTIME_ALLOC_H_
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -82,4 +82,4 @@ static inline void ts_free(void *buffer) {
 }
 #endif
 
-#endif  // RUNTIME_ALLOC_H_
+#endif  // TREE_SITTER_ALLOC_H_
diff --git a/lib/src/array.h b/lib/src/array.h
index 3f5b6b29..04565f33 100644
--- a/lib/src/array.h
+++ b/lib/src/array.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_ARRAY_H_
-#define RUNTIME_ARRAY_H_
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -132,4 +132,4 @@ static inline void array__splice(VoidArray *self, size_t element_size,
 }
 #endif
 
-#endif  // RUNTIME_ARRAY_H_
+#endif  // TREE_SITTER_ARRAY_H_
diff --git a/lib/src/atomic.h b/lib/src/atomic.h
index d1ab1f23..78a4d7d8 100644
--- a/lib/src/atomic.h
+++ b/lib/src/atomic.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_ATOMIC_H_
-#define RUNTIME_ATOMIC_H_
+#ifndef TREE_SITTER_ATOMIC_H_
+#define TREE_SITTER_ATOMIC_H_
 
 #include <stdint.h>
 
@@ -27,4 +27,4 @@ static inline uint32_t atomic_dec(volatile uint32_t *p) {
 
 #endif
 
-#endif  // RUNTIME_ATOMIC_H_
+#endif  // TREE_SITTER_ATOMIC_H_
diff --git a/lib/src/error_costs.h b/lib/src/error_costs.h
index d6420488..32d3666a 100644
--- a/lib/src/error_costs.h
+++ b/lib/src/error_costs.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_ERROR_COSTS_H_
-#define RUNTIME_ERROR_COSTS_H_
+#ifndef TREE_SITTER_ERROR_COSTS_H_
+#define TREE_SITTER_ERROR_COSTS_H_
 
 #define ERROR_STATE 0
 #define ERROR_COST_PER_RECOVERY 500
diff --git a/lib/src/get_changed_ranges.h b/lib/src/get_changed_ranges.h
index 2764b55f..a1f1dbb4 100644
--- a/lib/src/get_changed_ranges.h
+++ b/lib/src/get_changed_ranges.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_GET_CHANGED_RANGES_H_
-#define RUNTIME_GET_CHANGED_RANGES_H_
+#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
+#define TREE_SITTER_GET_CHANGED_RANGES_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -33,4 +33,4 @@ unsigned ts_subtree_get_changed_ranges(
 }
 #endif
 
-#endif  // RUNTIME_GET_CHANGED_RANGES_H_
+#endif  // TREE_SITTER_GET_CHANGED_RANGES_H_
diff --git a/lib/src/language.h b/lib/src/language.h
index c8e5e8a1..0a0f108f 100644
--- a/lib/src/language.h
+++ b/lib/src/language.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_LANGUAGE_H_
-#define RUNTIME_LANGUAGE_H_
+#ifndef TREE_SITTER_LANGUAGE_H_
+#define TREE_SITTER_LANGUAGE_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -91,4 +91,4 @@ ts_language_alias_sequence(const TSLanguage *self, unsigned id) {
 }
 #endif
 
-#endif  // RUNTIME_LANGUAGE_H_
+#endif  // TREE_SITTER_LANGUAGE_H_
diff --git a/lib/src/length.h b/lib/src/length.h
index db325f7a..ffe0c7f4 100644
--- a/lib/src/length.h
+++ b/lib/src/length.h
@@ -1,10 +1,10 @@
-#ifndef RUNTIME_LENGTH_H_
-#define RUNTIME_LENGTH_H_
+#ifndef TREE_SITTER_LENGTH_H_
+#define TREE_SITTER_LENGTH_H_
 
 #include <stdlib.h>
 #include <stdbool.h>
 #include "./point.h"
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 
 typedef struct {
   uint32_t bytes;
diff --git a/lib/src/lexer.h b/lib/src/lexer.h
index 327350f6..f523d88f 100644
--- a/lib/src/lexer.h
+++ b/lib/src/lexer.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_LEXER_H_
-#define RUNTIME_LEXER_H_
+#ifndef TREE_SITTER_LEXER_H_
+#define TREE_SITTER_LEXER_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -7,7 +7,7 @@ extern "C" {
 
 #include "./length.h"
 #include "./subtree.h"
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 #include "tree_sitter/parser.h"
 
 typedef struct {
@@ -45,4 +45,4 @@ TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
 }
 #endif
 
-#endif  // RUNTIME_LEXER_H_
+#endif  // TREE_SITTER_LEXER_H_
diff --git a/lib/src/runtime.c b/lib/src/lib.c
similarity index 100%
rename from lib/src/runtime.c
rename to lib/src/lib.c
diff --git a/lib/src/parser.c b/lib/src/parser.c
index a33dbc6f..c2ebfeeb 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -2,7 +2,7 @@
 #include <stdio.h>
 #include <limits.h>
 #include <stdbool.h>
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 #include "./subtree.h"
 #include "./lexer.h"
 #include "./length.h"
diff --git a/lib/src/point.h b/lib/src/point.h
index 0c4941d5..4d0aed18 100644
--- a/lib/src/point.h
+++ b/lib/src/point.h
@@ -1,7 +1,7 @@
-#ifndef RUNTIME_POINT_H_
-#define RUNTIME_POINT_H_
+#ifndef TREE_SITTER_POINT_H_
+#define TREE_SITTER_POINT_H_
 
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 
 #define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
 
diff --git a/lib/src/reduce_action.h b/lib/src/reduce_action.h
index 91835c39..5956fb5d 100644
--- a/lib/src/reduce_action.h
+++ b/lib/src/reduce_action.h
@@ -1,12 +1,12 @@
-#ifndef RUNTIME_REDUCE_ACTION_H_
-#define RUNTIME_REDUCE_ACTION_H_
+#ifndef TREE_SITTER_REDUCE_ACTION_H_
+#define TREE_SITTER_REDUCE_ACTION_H_
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #include "./array.h"
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 
 typedef struct {
   uint32_t count;
@@ -31,4 +31,4 @@ static inline void ts_reduce_action_set_add(ReduceActionSet *self,
 }
 #endif
 
-#endif  // RUNTIME_REDUCE_ACTION_H_
+#endif  // TREE_SITTER_REDUCE_ACTION_H_
diff --git a/lib/src/stack.h b/lib/src/stack.h
index d476d763..1ccd98cd 100644
--- a/lib/src/stack.h
+++ b/lib/src/stack.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_PARSE_STACK_H_
-#define RUNTIME_PARSE_STACK_H_
+#ifndef TREE_SITTER_PARSE_STACK_H_
+#define TREE_SITTER_PARSE_STACK_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -132,4 +132,4 @@ void ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *);
 }
 #endif
 
-#endif  // RUNTIME_PARSE_STACK_H_
+#endif  // TREE_SITTER_PARSE_STACK_H_
diff --git a/lib/src/subtree.h b/lib/src/subtree.h
index cc5c79aa..039494b5 100644
--- a/lib/src/subtree.h
+++ b/lib/src/subtree.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_SUBTREE_H_
-#define RUNTIME_SUBTREE_H_
+#ifndef TREE_SITTER_SUBTREE_H_
+#define TREE_SITTER_SUBTREE_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -10,7 +10,7 @@ extern "C" {
 #include "./length.h"
 #include "./array.h"
 #include "./error_costs.h"
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 #include "tree_sitter/parser.h"
 
 extern TSStateId TS_TREE_STATE_NONE;
@@ -285,4 +285,4 @@ static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
 }
 #endif
 
-#endif  // RUNTIME_SUBTREE_H_
+#endif  // TREE_SITTER_SUBTREE_H_
diff --git a/lib/src/tree.c b/lib/src/tree.c
index b729c8c7..9f294412 100644
--- a/lib/src/tree.c
+++ b/lib/src/tree.c
@@ -1,4 +1,4 @@
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 #include "./array.h"
 #include "./get_changed_ranges.h"
 #include "./subtree.h"
diff --git a/lib/src/tree.h b/lib/src/tree.h
index dd4f3184..92a7e641 100644
--- a/lib/src/tree.h
+++ b/lib/src/tree.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_TREE_H_
-#define RUNTIME_TREE_H_
+#ifndef TREE_SITTER_TREE_H_
+#define TREE_SITTER_TREE_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -31,4 +31,4 @@ void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *);
 }
 #endif
 
-#endif  // RUNTIME_TREE_H_
+#endif  // TREE_SITTER_TREE_H_
diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c
index d352c32b..5ccf4501 100644
--- a/lib/src/tree_cursor.c
+++ b/lib/src/tree_cursor.c
@@ -1,4 +1,4 @@
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 #include "./alloc.h"
 #include "./tree_cursor.h"
 #include "./language.h"
diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h
index 6e46b7dd..55bdad86 100644
--- a/lib/src/tree_cursor.h
+++ b/lib/src/tree_cursor.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_TREE_CURSOR_H_
-#define RUNTIME_TREE_CURSOR_H_
+#ifndef TREE_SITTER_TREE_CURSOR_H_
+#define TREE_SITTER_TREE_CURSOR_H_
 
 #include "./subtree.h"
 
@@ -17,4 +17,4 @@ typedef struct {
 
 void ts_tree_cursor_init(TreeCursor *, TSNode);
 
-#endif  // RUNTIME_TREE_CURSOR_H_
+#endif  // TREE_SITTER_TREE_CURSOR_H_
diff --git a/lib/src/utf16.h b/lib/src/utf16.h
index 0cf69218..32fd05e6 100644
--- a/lib/src/utf16.h
+++ b/lib/src/utf16.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_UTF16_H_
-#define RUNTIME_UTF16_H_
+#ifndef TREE_SITTER_UTF16_H_
+#define TREE_SITTER_UTF16_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -18,4 +18,4 @@ utf8proc_ssize_t utf16_iterate(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8p
 }
 #endif
 
-#endif  // RUNTIME_UTF16_H_
+#endif  // TREE_SITTER_UTF16_H_
diff --git a/script/bindgen.sh b/script/bindgen.sh
index 0a536d20..f9299095 100755
--- a/script/bindgen.sh
+++ b/script/bindgen.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 output_path=lib/binding/bindings.rs
-header_path='lib/include/tree_sitter/runtime.h'
+header_path='lib/include/tree_sitter/api.h'
 
 bindgen                         \
   --no-layout-tests             \
diff --git a/script/build-runtime b/script/build-lib
similarity index 93%
rename from script/build-runtime
rename to script/build-lib
index 7b2e99f2..b81a4b0a 100755
--- a/script/build-runtime
+++ b/script/build-lib
@@ -14,7 +14,7 @@ ${CC}                   \
   -I lib/src            \
   -I lib/include        \
   -I lib/utf8proc       \
-  lib/src/runtime.c     \
+  lib/src/lib.c     \
   -o tree-sitter.o
 
 ar rcs libtree-sitter.a tree-sitter.o
diff --git a/test/fuzz/fuzzer.cc b/test/fuzz/fuzzer.cc
index 2ed7683f..8d6f9cef 100644
--- a/test/fuzz/fuzzer.cc
+++ b/test/fuzz/fuzzer.cc
@@ -1,5 +1,5 @@
 #include <cassert>
-#include "tree_sitter/runtime.h"
+#include "tree_sitter/api.h"
 
 extern "C" const TSLanguage *TS_LANG();
 

From fab4673c14ba5fc066bf13e3408802be9d7b5657 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 10 Jan 2019 15:23:31 -0800
Subject: [PATCH 118/208] Get fixture grammars building for library unit tests

---
 .appveyor.yml                  |  2 +-
 .travis.yml                    |  2 +-
 lib/build.rs                   | 63 ++++++++++++++++++++++++++++------
 script/fetch-fixtures          | 15 ++++----
 script/fetch-fixtures.cmd      | 15 ++++----
 script/fetch-test-fixtures.cmd | 16 ---------
 script/fetch-test-fixtures.sh  | 14 --------
 7 files changed, 70 insertions(+), 57 deletions(-)
 delete mode 100755 script/fetch-test-fixtures.cmd
 delete mode 100755 script/fetch-test-fixtures.sh

diff --git a/.appveyor.yml b/.appveyor.yml
index 7dccb660..934d6f51 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -12,7 +12,7 @@ install:
   - rustc -vV
   - cargo -vV
 
-  - script\fetch-test-fixtures.cmd
+  - script\fetch-fixtures.cmd
 
 test_script:
   - cargo build
diff --git a/.travis.yml b/.travis.yml
index 65c021cf..45bc26e3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,7 @@ env:
   - TREE_SITTER_TEST=1
 
 before_install:
-  - ./script/fetch-test-fixtures.sh
+  - ./script/fetch-fixtures
 
 branches:
   only:
diff --git a/lib/build.rs b/lib/build.rs
index cee131bd..f8c19f05 100644
--- a/lib/build.rs
+++ b/lib/build.rs
@@ -1,26 +1,67 @@
 extern crate cc;
 
 use std::env;
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
+use std::fs;
 
 fn main() {
     let mut config = cc::Build::new();
-    let src_path: PathBuf = ["src"].iter().collect();
-
     config
         .define("UTF8PROC_STATIC", "")
         .flag_if_supported("-std=c99")
         .flag_if_supported("-Wno-unused-parameter")
         .include("include")
         .include("utf8proc")
-        .file(src_path.join("runtime.c"));
+        .file(Path::new("src").join("lib.c"))
+        .compile("tree-sitter");
 
-    if env::var("RUST_TREE_SITTER_TEST").is_ok() {
-        let parser_dir: PathBuf = ["fixtures", "tree-sitter-rust", "src"].iter().collect();
-        config
-            .file(parser_dir.join("parser.c"))
-            .file(parser_dir.join("scanner.c"));
+    if env::var("TREE_SITTER_TEST").is_ok() {
+        let mut parser_config = cc::Build::new();
+        parser_config
+            .opt_level(0)
+            .flag_if_supported("-Wno-unused-parameter");
+
+        let mut scanner_c_config = cc::Build::new();
+        scanner_c_config
+            .flag_if_supported("-std=c99")
+            .flag_if_supported("-Wno-unused-parameter");
+
+        let mut scanner_cxx_config = cc::Build::new();
+        scanner_cxx_config
+            .cpp(true)
+            .flag_if_supported("-Wno-unused-parameter");
+
+        let grammars_dir: PathBuf = ["..", "test", "fixtures", "grammars"].iter().collect();
+        for entry in fs::read_dir(&grammars_dir).expect("Failed to list grammar directory") {
+            let entry = entry.expect("Failed to load grammars directory entry");
+            if !entry.path().is_dir() {
+                continue;
+            }
+            let parser_dir_path = entry.path();
+            let parser_src_path = parser_dir_path.join("src");
+            let parser_c_path = parser_src_path.join("parser.c");
+            let scanner_c_path = parser_src_path.join("scanner.c");
+            let scanner_cc_path = parser_src_path.join("scanner.cc");
+
+            println!("cargo:rerun-if-changed={}", parser_c_path.to_str().unwrap());
+            parser_config
+                .include(&parser_src_path)
+                .file(&parser_c_path);
+            if scanner_cc_path.exists() {
+                println!("cargo:rerun-if-changed={}", scanner_cc_path.to_str().unwrap());
+                scanner_cxx_config
+                    .include(&parser_src_path)
+                    .file(&scanner_cc_path);
+            } else if scanner_c_path.exists() {
+                println!("cargo:rerun-if-changed={}", scanner_c_path.to_str().unwrap());
+                scanner_c_config
+                    .include(&parser_src_path)
+                    .file(&scanner_c_path);
+            }
+        }
+
+        parser_config.compile("fixture-parsers");
+        scanner_c_config.compile("fixture-scanners-c");
+        scanner_cxx_config.compile("fixture-scanners-cxx");
     }
-
-    config.compile("tree-sitter-runtime");
 }
diff --git a/script/fetch-fixtures b/script/fetch-fixtures
index a298a4d0..94f9eddd 100755
--- a/script/fetch-fixtures
+++ b/script/fetch-fixtures
@@ -21,14 +21,15 @@ fetch_grammar() {
   )
 }
 
-fetch_grammar embedded-template master
-fetch_grammar javascript        master
-fetch_grammar json              master
+fetch_grammar bash              master
 fetch_grammar c                 master
 fetch_grammar cpp               master
-fetch_grammar python            master
+fetch_grammar embedded-template master
 fetch_grammar go                master
-fetch_grammar ruby              master
-fetch_grammar typescript        master
-fetch_grammar bash              master
 fetch_grammar html              master
+fetch_grammar javascript        master
+fetch_grammar json              master
+fetch_grammar python            master
+fetch_grammar ruby              master
+fetch_grammar rust              master
+fetch_grammar typescript        master
diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd
index 0e65b0a0..98d5d578 100644
--- a/script/fetch-fixtures.cmd
+++ b/script/fetch-fixtures.cmd
@@ -1,16 +1,17 @@
 @echo off
 
-call:fetch_grammar embedded-template master
-call:fetch_grammar javascript        master
-call:fetch_grammar json              master
+call:fetch_grammar bash              master
 call:fetch_grammar c                 master
 call:fetch_grammar cpp               master
-call:fetch_grammar python            master
+call:fetch_grammar embedded-template master
 call:fetch_grammar go                master
-call:fetch_grammar ruby              master
-call:fetch_grammar typescript        master
-call:fetch_grammar bash              master
 call:fetch_grammar html              master
+call:fetch_grammar javascript        master
+call:fetch_grammar json              master
+call:fetch_grammar python            master
+call:fetch_grammar ruby              master
+call:fetch_grammar rust              master
+call:fetch_grammar typescript        master
 EXIT /B 0
 
 :fetch_grammar
diff --git a/script/fetch-test-fixtures.cmd b/script/fetch-test-fixtures.cmd
deleted file mode 100755
index 33543961..00000000
--- a/script/fetch-test-fixtures.cmd
+++ /dev/null
@@ -1,16 +0,0 @@
-@Echo off
-SETLOCAL
-
-Set grammar_dir=fixtures\tree-sitter-rust
-Set grammar_url=https://github.com/tree-sitter/tree-sitter-rust
-
-@IF NOT EXIST  %grammar_dir% (
-  git clone %grammar_url% %grammar_dir% --depth=1
-)
-
-pushd %grammar_dir%
-git fetch origin master --depth=1
-git reset --hard origin/master
-popd
-
-ENDLOCAL
diff --git a/script/fetch-test-fixtures.sh b/script/fetch-test-fixtures.sh
deleted file mode 100755
index 24cc316a..00000000
--- a/script/fetch-test-fixtures.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-grammar_dir='fixtures/tree-sitter-rust'
-grammar_url='https://github.com/tree-sitter/tree-sitter-rust'
-
-if [ ! -d $grammar_dir ]; then
-  git clone $grammar_url $grammar_dir --depth=1
-fi
-
-(
-  cd $grammar_dir;
-  git fetch origin master --depth=1
-  git reset --hard origin/master;
-)

From 99531d757cc8b5c839a4d1605a438fc334a68bc2 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 10 Jan 2019 15:50:34 -0800
Subject: [PATCH 119/208] Configure caching on travis and appveyor

---
 .appveyor.yml | 3 ++-
 .travis.yml   | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 934d6f51..1d9fb179 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -23,5 +23,6 @@ branches:
     - master
 
 cache:
-  - test\fixtures
+  - target
+  - test\fixtures\grammars
   - C:\Users\appveyor\.cargo
diff --git a/.travis.yml b/.travis.yml
index 45bc26e3..47b88e81 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,3 +12,8 @@ before_install:
 branches:
   only:
   - master
+
+cache:
+  cargo: true
+  directories:
+    - test/fixtures/grammars

From ae6dbb945b8f2c2b3fc3159dcd1a453457aa63fa Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 10 Jan 2019 15:50:54 -0800
Subject: [PATCH 120/208] Avoid using unix-specific methods on windows

---
 cli/src/util.rs | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/cli/src/util.rs b/cli/src/util.rs
index d7d8572e..f36cbe79 100644
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@@ -4,8 +4,23 @@ use std::process::{Child, ChildStdin, Command, Stdio};
 use std::str;
 use tree_sitter::Parser;
 
+#[cfg(windows)]
+pub(crate) struct LogSession();
+
+#[cfg(windows)]
+pub(crate) fn start_logging_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
+    Ok(LogSession())
+}
+
+#[cfg(windows)]
+pub(crate) fn stop_logging_graphs(parser: &mut Parser, mut session: LogSession) -> Result<()> {
+    Ok(())
+}
+
+#[cfg(unix)]
 pub(crate) struct LogSession(Child, ChildStdin);
 
+#[cfg(unix)]
 pub(crate) fn start_logging_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
     let mut dot_file = File::create(path)?;
     dot_file.write(b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n")?;
@@ -19,13 +34,20 @@ pub(crate) fn start_logging_graphs(parser: &mut Parser, path: &str) -> Result<Lo
         .stdin
         .take()
         .expect("Failed to open stdin for Dot");
+
     parser.print_dot_graphs(&dot_stdin);
+
     Ok(LogSession(dot_process, dot_stdin))
 }
 
+#[cfg(unix)]
 pub(crate) fn stop_logging_graphs(parser: &mut Parser, mut session: LogSession) -> Result<()> {
     drop(session.1);
-    parser.stop_printing_dot_graphs();
+
+    if cfg!(unix) {
+        parser.stop_printing_dot_graphs();
+    }
+
     session.0.wait()?;
 
     if cfg!(target_os = "macos") {

From 272046a2506ffba2676a38d5da557f471a44b827 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 10 Jan 2019 17:11:57 -0800
Subject: [PATCH 121/208] Reorganize tests - move them all into the CLI crate

---
 .../prepare_grammar/intern_symbols.rs         |   2 +-
 .../prepare_grammar/process_inlines.rs        |   2 +-
 cli/src/generate/properties.rs                |   4 +-
 cli/src/main.rs                               |   3 +
 cli/src/tests/corpuses.rs                     |  57 ++
 cli/src/tests/languages.rs                    |  19 +
 cli/src/tests/mod.rs                          |   3 +
 cli/src/tests/parser_api.rs                   | 507 +++++++++++++++++
 lib/binding/lib.rs                            | 519 +-----------------
 lib/build.rs                                  |   1 +
 10 files changed, 595 insertions(+), 522 deletions(-)
 create mode 100644 cli/src/tests/corpuses.rs
 create mode 100644 cli/src/tests/languages.rs
 create mode 100644 cli/src/tests/mod.rs
 create mode 100644 cli/src/tests/parser_api.rs

diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs
index a466935b..a7248817 100644
--- a/cli/src/generate/prepare_grammar/intern_symbols.rs
+++ b/cli/src/generate/prepare_grammar/intern_symbols.rs
@@ -219,7 +219,7 @@ mod tests {
         let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));
 
         match result {
-            Err(Error(message)) => assert_eq!(message, "Undefined symbol 'y'"),
+            Err(Error(message)) => assert_eq!(message, "Undefined symbol `y`"),
             _ => panic!("Expected an error but got none"),
         }
     }
diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs
index 3c0f529a..f58de63d 100644
--- a/cli/src/generate/prepare_grammar/process_inlines.rs
+++ b/cli/src/generate/prepare_grammar/process_inlines.rs
@@ -408,7 +408,7 @@ mod tests {
                             ProductionStep::new(Symbol::terminal(11))
                                 .with_prec(2, None)
                                 .with_alias("inner_alias", true),
-                            ProductionStep::new(Symbol::terminal(12)).with_prec(3, None),
+                            ProductionStep::new(Symbol::terminal(12)),
                         ],
                     }],
                 },
diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index 9e570a99..e3b60185 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -118,7 +118,7 @@ impl Builder {
         self.remove_duplicate_states();
 
         for (i, state) in self.output.states.iter_mut().enumerate() {
-            state.id = i;
+            state.id = Some(i);
         }
 
         self.output
@@ -130,7 +130,7 @@ impl Builder {
             Entry::Vacant(v) => {
                 let state_id = self.output.states.len();
                 self.output.states.push(PropertyStateJSON {
-                    id: 0,
+                    id: None,
                     transitions: Vec::new(),
                     property_set_id: 0,
                     default_next_state_id: 0,
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 334f06ef..9f095668 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -17,6 +17,9 @@ mod parse;
 mod test;
 mod util;
 
+#[cfg(test)]
+mod tests;
+
 use self::loader::Loader;
 use clap::{App, Arg, SubCommand};
 use std::env;
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
new file mode 100644
index 00000000..b70bb371
--- /dev/null
+++ b/cli/src/tests/corpuses.rs
@@ -0,0 +1,57 @@
+use super::languages;
+use crate::test::{parse_tests, TestEntry};
+use std::path::PathBuf;
+use tree_sitter::{Language, Parser};
+
+lazy_static! {
+    static ref LANGUAGES: [(&'static str, Language); 6] = [
+        ("c", languages::c()),
+        ("cpp", languages::cpp()),
+        ("embedded-template", languages::embedded_template()),
+        ("go", languages::go()),
+        ("html", languages::html()),
+        ("javascript", languages::javascript()),
+    ];
+}
+
+#[test]
+fn test_corpus_files() {
+    let mut parser = Parser::new();
+    let grammars_dir: PathBuf = [
+        env!("CARGO_MANIFEST_DIR"),
+        "..",
+        "test",
+        "fixtures",
+        "grammars",
+    ]
+    .iter()
+    .collect();
+
+    for (name, language) in LANGUAGES.iter().cloned() {
+        let corpus_dir = grammars_dir.join(name).join("corpus");
+        let test = parse_tests(&corpus_dir).unwrap();
+        parser.set_language(language).unwrap();
+        run_mutation_tests(&mut parser, test);
+    }
+}
+
+fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
+    match test {
+        TestEntry::Example {
+            name,
+            input,
+            output,
+        } => {
+            let tree = parser
+                .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
+                .unwrap();
+            let actual = tree.root_node().to_sexp();
+            assert_eq!(actual, output);
+        }
+        TestEntry::Group { name, children } => {
+            for child in children {
+                run_mutation_tests(parser, child);
+            }
+        }
+    }
+}
diff --git a/cli/src/tests/languages.rs b/cli/src/tests/languages.rs
new file mode 100644
index 00000000..0c483d08
--- /dev/null
+++ b/cli/src/tests/languages.rs
@@ -0,0 +1,19 @@
+use tree_sitter::Language;
+
+extern "C" {
+    fn tree_sitter_c() -> Language;
+    fn tree_sitter_cpp() -> Language;
+    fn tree_sitter_embedded_template() -> Language;
+    fn tree_sitter_go() -> Language;
+    fn tree_sitter_html() -> Language;
+    fn tree_sitter_javascript() -> Language;
+    fn tree_sitter_rust() -> Language;
+}
+
+pub fn c() -> Language { unsafe { tree_sitter_c() } }
+pub fn cpp() -> Language { unsafe { tree_sitter_cpp() } }
+pub fn embedded_template() -> Language { unsafe { tree_sitter_embedded_template() } }
+pub fn go() -> Language { unsafe { tree_sitter_go() } }
+pub fn html() -> Language { unsafe { tree_sitter_html() } }
+pub fn javascript() -> Language { unsafe { tree_sitter_javascript() } }
+pub fn rust() -> Language { unsafe { tree_sitter_rust() } }
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
new file mode 100644
index 00000000..bc199616
--- /dev/null
+++ b/cli/src/tests/mod.rs
@@ -0,0 +1,3 @@
+mod languages;
+mod corpuses;
+mod parser_api;
diff --git a/cli/src/tests/parser_api.rs b/cli/src/tests/parser_api.rs
new file mode 100644
index 00000000..af5ba71f
--- /dev/null
+++ b/cli/src/tests/parser_api.rs
@@ -0,0 +1,507 @@
+use super::languages::rust;
+use std::thread;
+use tree_sitter::{InputEdit, LogType, Parser, Point, PropertySheet, Range};
+
+#[test]
+fn test_basic_parsing() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+
+    let tree = parser
+        .parse_str(
+            "
+        struct Stuff {}
+        fn main() {}
+    ",
+            None,
+        )
+        .unwrap();
+
+    let root_node = tree.root_node();
+    assert_eq!(root_node.kind(), "source_file");
+
+    assert_eq!(
+        root_node.to_sexp(),
+        "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
+    );
+
+    let struct_node = root_node.child(0).unwrap();
+    assert_eq!(struct_node.kind(), "struct_item");
+}
+
+#[test]
+fn test_logging() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+
+    let mut messages = Vec::new();
+    parser.set_logger(Some(Box::new(|log_type, message| {
+        messages.push((log_type, message.to_string()));
+    })));
+
+    parser
+        .parse_str(
+            "
+        struct Stuff {}
+        fn main() {}
+    ",
+            None,
+        )
+        .unwrap();
+
+    assert!(messages.contains(&(
+        LogType::Parse,
+        "reduce sym:struct_item, child_count:3".to_string()
+    )));
+    assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
+}
+
+#[test]
+fn test_tree_cursor() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+
+    let tree = parser
+        .parse_str(
+            "
+                struct Stuff {
+                    a: A;
+                    b: Option<B>,
+                }
+            ",
+            None,
+        )
+        .unwrap();
+
+    let mut cursor = tree.walk();
+    assert_eq!(cursor.node().kind(), "source_file");
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "struct_item");
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "struct");
+    assert_eq!(cursor.node().is_named(), false);
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "type_identifier");
+    assert_eq!(cursor.node().is_named(), true);
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "field_declaration_list");
+    assert_eq!(cursor.node().is_named(), true);
+}
+
+#[test]
+fn test_tree_property_matching() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+    let source_code = "fn f1() { f2(); }";
+    let tree = parser.parse_str(source_code, None).unwrap();
+
+    #[derive(Debug, Deserialize, PartialEq, Eq)]
+    struct Properties {
+        reference: Option<String>,
+        define: Option<String>,
+    }
+
+    let empty_properties = Properties {
+        reference: None,
+        define: None,
+    };
+
+    let property_sheet = PropertySheet::<Properties>::new(
+        rust(),
+        r##"
+        {
+            "states": [
+                {
+                    "transitions": [
+                        {"type": "call_expression", "named": true, "state_id": 1},
+                        {"type": "function_item", "named": true, "state_id": 2}
+                    ],
+                    "default_next_state_id": 0,
+                    "property_set_id": 0
+                },
+                {
+                    "transitions": [
+                        {"type": "identifier", "named": true, "state_id": 3}
+                    ],
+                    "default_next_state_id": 0,
+                    "property_set_id": 0
+                },
+                {
+                    "transitions": [
+                        {"type": "identifier", "named": true, "state_id": 4}
+                    ],
+                    "default_next_state_id": 0,
+                    "property_set_id": 0
+                },
+                {
+                    "transitions": [],
+                    "default_next_state_id": 0,
+                    "property_set_id": 1
+                },
+                {
+                    "transitions": [],
+                    "default_next_state_id": 0,
+                    "property_set_id": 2
+                }
+            ],
+            "property_sets": [
+                {},
+                {"reference": "function"},
+                {"define": "function"}
+            ]
+        }
+    "##,
+    )
+    .unwrap();
+
+    let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
+    assert_eq!(cursor.node().kind(), "source_file");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "function_item");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "fn");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+    assert!(!cursor.goto_first_child());
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(cursor.node_properties().define, Some("function".to_owned()));
+    assert!(!cursor.goto_first_child());
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "parameters");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "(");
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), ")");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    assert!(cursor.goto_parent());
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "block");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    assert!(cursor.goto_first_child());
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "call_expression");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(
+        cursor.node_properties().reference,
+        Some("function".to_owned())
+    );
+}
+
+#[test]
+fn test_tree_property_matching_with_regexes() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+    let source_code = "fn f1() { None(a()) }";
+    let tree = parser.parse_str(source_code, None).unwrap();
+
+    #[derive(Debug, Deserialize, PartialEq, Eq)]
+    struct Properties {
+        scope: Option<String>,
+    }
+
+    let empty_properties = Properties { scope: None };
+
+    let property_sheet = PropertySheet::<Properties>::new(
+        rust(),
+        r##"
+        {
+            "states": [
+                {
+                    "id": 0,
+                    "transitions": [
+                        {"type": "call_expression", "named": true, "state_id": 1}
+                    ],
+                    "default_next_state_id": 0,
+                    "property_set_id": 0
+                },
+                {
+                    "id": 1,
+                    "transitions": [
+                        {"type": "identifier", "named": true, "text": "^[A-Z]", "state_id": 2},
+                        {"type": "identifier", "named": true, "state_id": 3}
+                    ],
+                    "default_next_state_id": 0,
+                    "property_set_id": 0
+                },
+                {
+                    "transitions": [],
+                    "default_next_state_id": 0,
+                    "property_set_id": 1
+                },
+                {
+                    "transitions": [],
+                    "default_next_state_id": 0,
+                    "property_set_id": 2
+                }
+            ],
+            "property_sets": [
+                {},
+                {"scope": "constructor"},
+                {"scope": "function"}
+            ]
+        }
+    "##,
+    )
+    .unwrap();
+
+    let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
+    assert_eq!(cursor.node().kind(), "source_file");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    cursor.goto_first_child();
+    assert!(cursor.goto_first_child());
+    assert!(cursor.goto_next_sibling());
+    assert!(cursor.goto_next_sibling());
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "block");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    assert!(cursor.goto_first_child());
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "call_expression");
+    assert_eq!(*cursor.node_properties(), empty_properties);
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(
+        cursor.node_properties().scope,
+        Some("constructor".to_owned())
+    );
+}
+
+#[test]
+fn test_custom_utf8_input() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+
+    let lines = &["pub fn foo() {", "  1", "}"];
+
+    let tree = parser
+        .parse_utf8(
+            &mut |_, position| {
+                let row = position.row;
+                let column = position.column;
+                if row < lines.len() {
+                    if column < lines[row].as_bytes().len() {
+                        &lines[row].as_bytes()[column..]
+                    } else {
+                        "\n".as_bytes()
+                    }
+                } else {
+                    &[]
+                }
+            },
+            None,
+        )
+        .unwrap();
+
+    let root = tree.root_node();
+    assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
+    assert_eq!(root.kind(), "source_file");
+    assert_eq!(root.has_error(), false);
+    assert_eq!(root.child(0).unwrap().kind(), "function_item");
+}
+
+#[test]
+fn test_custom_utf16_input() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+
+    parser.set_logger(Some(Box::new(|t, message| {
+        println!("log: {:?} {}", t, message);
+    })));
+
+    let lines: Vec<Vec<u16>> = ["pub fn foo() {", "  1", "}"]
+        .iter()
+        .map(|s| s.encode_utf16().collect())
+        .collect();
+
+    let tree = parser
+        .parse_utf16(
+            &mut |_, position| {
+                let row = position.row;
+                let column = position.column;
+                if row < lines.len() {
+                    if column < lines[row].len() {
+                        &lines[row][column..]
+                    } else {
+                        &[10]
+                    }
+                } else {
+                    &[]
+                }
+            },
+            None,
+        )
+        .unwrap();
+
+    let root = tree.root_node();
+    assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
+    assert_eq!(root.kind(), "source_file");
+    assert_eq!(root.has_error(), false);
+    assert_eq!(root.child(0).unwrap().kind(), "function_item");
+}
+
+#[test]
+fn test_node_equality() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+    let tree = parser.parse_str("struct A {}", None).unwrap();
+    let node1 = tree.root_node();
+    let node2 = tree.root_node();
+    assert_eq!(node1, node2);
+    assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
+    assert_ne!(node1.child(0).unwrap(), node2);
+}
+
+#[test]
+fn test_editing() {
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+
+    let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
+    let mut input_bytes_read = Vec::new();
+
+    let mut tree = parser
+        .parse_utf8(
+            &mut |offset, _| {
+                let offset = offset;
+                if offset < input_bytes.len() {
+                    let result = &input_bytes[offset..offset + 1];
+                    input_bytes_read.extend(result.iter());
+                    result
+                } else {
+                    &[]
+                }
+            },
+            None,
+        )
+        .unwrap();
+
+    let parameters_sexp = tree
+        .root_node()
+        .named_child(0)
+        .unwrap()
+        .named_child(1)
+        .unwrap()
+        .to_sexp();
+    assert_eq!(
+        parameters_sexp,
+        "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
+    );
+
+    input_bytes_read.clear();
+    input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
+    tree.edit(&InputEdit {
+        start_byte: 14,
+        old_end_byte: 14,
+        new_end_byte: 20,
+        start_position: Point::new(0, 14),
+        old_end_position: Point::new(0, 14),
+        new_end_position: Point::new(0, 20),
+    });
+
+    let tree = parser
+        .parse_utf8(
+            &mut |offset, _| {
+                let offset = offset;
+                if offset < input_bytes.len() {
+                    let result = &input_bytes[offset..offset + 1];
+                    input_bytes_read.extend(result.iter());
+                    result
+                } else {
+                    &[]
+                }
+            },
+            Some(&tree),
+        )
+        .unwrap();
+
+    let parameters_sexp = tree
+        .root_node()
+        .named_child(0)
+        .unwrap()
+        .named_child(1)
+        .unwrap()
+        .to_sexp();
+    assert_eq!(
+        parameters_sexp,
+        "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
+    );
+
+    let retokenized_content = String::from_utf8(input_bytes_read).unwrap();
+    assert!(retokenized_content.contains("b: B"));
+    assert!(!retokenized_content.contains("a: A"));
+    assert!(!retokenized_content.contains("c: C"));
+    assert!(!retokenized_content.contains("{}"));
+}
+
+#[test]
+fn test_parallel_parsing() {
+    // Parse this source file so that each thread has a non-trivial amount of
+    // work to do.
+    let this_file_source = include_str!("parser_api.rs");
+
+    let mut parser = Parser::new();
+    parser.set_language(rust()).unwrap();
+    let tree = parser.parse_str(this_file_source, None).unwrap();
+
+    let mut parse_threads = Vec::new();
+    for thread_id in 1..5 {
+        let mut tree_clone = tree.clone();
+        parse_threads.push(thread::spawn(move || {
+            // For each thread, prepend a different number of declarations to the
+            // source code.
+            let mut prepend_line_count = 0;
+            let mut prepended_source = String::new();
+            for _ in 0..thread_id {
+                prepend_line_count += 2;
+                prepended_source += "struct X {}\n\n";
+            }
+
+            tree_clone.edit(&InputEdit {
+                start_byte: 0,
+                old_end_byte: 0,
+                new_end_byte: prepended_source.len(),
+                start_position: Point::new(0, 0),
+                old_end_position: Point::new(0, 0),
+                new_end_position: Point::new(prepend_line_count, 0),
+            });
+            prepended_source += this_file_source;
+
+            // Reparse using the old tree as a starting point.
+            let mut parser = Parser::new();
+            parser.set_language(rust()).unwrap();
+            parser
+                .parse_str(&prepended_source, Some(&tree_clone))
+                .unwrap()
+        }));
+    }
+
+    // Check that the trees have the expected relationship to one another.
+    let trees = parse_threads
+        .into_iter()
+        .map(|thread| thread.join().unwrap());
+    let child_count_differences = trees
+        .map(|t| t.root_node().child_count() - tree.root_node().child_count())
+        .collect::<Vec<_>>();
+
+    assert_eq!(child_count_differences, &[1, 2, 3, 4]);
+}
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index 88cc24be..08f863f8 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -94,7 +94,7 @@ pub struct PropertyTransitionJSON {
 
 #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub struct PropertyStateJSON {
-    pub id: usize,
+    pub id: Option<usize>,
     pub property_set_id: usize,
     pub transitions: Vec<PropertyTransitionJSON>,
     pub default_next_state_id: usize,
@@ -847,520 +847,3 @@ impl<P> PropertySheet<P> {
         })
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::thread;
-
-    fn rust() -> Language {
-        unsafe { tree_sitter_rust() }
-    }
-    extern "C" {
-        fn tree_sitter_rust() -> Language;
-    }
-
-    #[test]
-    fn test_basic_parsing() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-
-        let tree = parser
-            .parse_str(
-                "
-            struct Stuff {}
-            fn main() {}
-        ",
-                None,
-            )
-            .unwrap();
-
-        let root_node = tree.root_node();
-        assert_eq!(root_node.kind(), "source_file");
-
-        assert_eq!(
-            root_node.to_sexp(),
-            "(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
-        );
-
-        let struct_node = root_node.child(0).unwrap();
-        assert_eq!(struct_node.kind(), "struct_item");
-    }
-
-    #[test]
-    fn test_logging() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-
-        let mut messages = Vec::new();
-        parser.set_logger(Some(Box::new(|log_type, message| {
-            messages.push((log_type, message.to_string()));
-        })));
-
-        parser
-            .parse_str(
-                "
-            struct Stuff {}
-            fn main() {}
-        ",
-                None,
-            )
-            .unwrap();
-
-        assert!(messages.contains(&(
-            LogType::Parse,
-            "reduce sym:struct_item, child_count:3".to_string()
-        )));
-        assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string())));
-    }
-
-    #[test]
-    fn test_tree_cursor() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-
-        let tree = parser
-            .parse_str(
-                "
-                    struct Stuff {
-                        a: A;
-                        b: Option<B>,
-                    }
-                ",
-                None,
-            )
-            .unwrap();
-
-        let mut cursor = tree.walk();
-        assert_eq!(cursor.node().kind(), "source_file");
-
-        assert!(cursor.goto_first_child());
-        assert_eq!(cursor.node().kind(), "struct_item");
-
-        assert!(cursor.goto_first_child());
-        assert_eq!(cursor.node().kind(), "struct");
-        assert_eq!(cursor.node().is_named(), false);
-
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), "type_identifier");
-        assert_eq!(cursor.node().is_named(), true);
-
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), "field_declaration_list");
-        assert_eq!(cursor.node().is_named(), true);
-    }
-
-    #[test]
-    fn test_tree_property_matching() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-        let source_code = "fn f1() { f2(); }";
-        let tree = parser.parse_str(source_code, None).unwrap();
-
-        #[derive(Debug, Deserialize, PartialEq, Eq)]
-        struct Properties {
-            reference: Option<String>,
-            define: Option<String>,
-        }
-
-        let empty_properties = Properties {
-            reference: None,
-            define: None,
-        };
-
-        let property_sheet = PropertySheet::<Properties>::new(
-            rust(),
-            r##"
-            {
-                "states": [
-                    {
-                        "transitions": [
-                            {"type": "call_expression", "named": true, "state_id": 1},
-                            {"type": "function_item", "named": true, "state_id": 2}
-                        ],
-                        "default_next_state_id": 0,
-                        "property_set_id": 0
-                    },
-                    {
-                        "transitions": [
-                            {"type": "identifier", "named": true, "state_id": 3}
-                        ],
-                        "default_next_state_id": 0,
-                        "property_set_id": 0
-                    },
-                    {
-                        "transitions": [
-                            {"type": "identifier", "named": true, "state_id": 4}
-                        ],
-                        "default_next_state_id": 0,
-                        "property_set_id": 0
-                    },
-                    {
-                        "transitions": [],
-                        "default_next_state_id": 0,
-                        "property_set_id": 1
-                    },
-                    {
-                        "transitions": [],
-                        "default_next_state_id": 0,
-                        "property_set_id": 2
-                    }
-                ],
-                "property_sets": [
-                    {},
-                    {"reference": "function"},
-                    {"define": "function"}
-                ]
-            }
-        "##,
-        )
-        .unwrap();
-
-        let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
-        assert_eq!(cursor.node().kind(), "source_file");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        assert!(cursor.goto_first_child());
-        assert_eq!(cursor.node().kind(), "function_item");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        assert!(cursor.goto_first_child());
-        assert_eq!(cursor.node().kind(), "fn");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-        assert!(!cursor.goto_first_child());
-
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), "identifier");
-        assert_eq!(cursor.node_properties().define, Some("function".to_owned()));
-        assert!(!cursor.goto_first_child());
-
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), "parameters");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        assert!(cursor.goto_first_child());
-        assert_eq!(cursor.node().kind(), "(");
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), ")");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        assert!(cursor.goto_parent());
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), "block");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        assert!(cursor.goto_first_child());
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), "call_expression");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        assert!(cursor.goto_first_child());
-        assert_eq!(cursor.node().kind(), "identifier");
-        assert_eq!(
-            cursor.node_properties().reference,
-            Some("function".to_owned())
-        );
-    }
-
-    #[test]
-    fn test_tree_property_matching_with_regexes() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-        let source_code = "fn f1() { None(a()) }";
-        let tree = parser.parse_str(source_code, None).unwrap();
-
-        #[derive(Debug, Deserialize, PartialEq, Eq)]
-        struct Properties {
-            scope: Option<String>,
-        }
-
-        let empty_properties = Properties { scope: None };
-
-        let property_sheet = PropertySheet::<Properties>::new(
-            rust(),
-            r##"
-            {
-                "states": [
-                    {
-                        "id": 0,
-                        "transitions": [
-                            {"type": "call_expression", "named": true, "state_id": 1}
-                        ],
-                        "default_next_state_id": 0,
-                        "property_set_id": 0
-                    },
-                    {
-                        "id": 1,
-                        "transitions": [
-                            {"type": "identifier", "named": true, "text": "^[A-Z]", "state_id": 2},
-                            {"type": "identifier", "named": true, "state_id": 3}
-                        ],
-                        "default_next_state_id": 0,
-                        "property_set_id": 0
-                    },
-                    {
-                        "transitions": [],
-                        "default_next_state_id": 0,
-                        "property_set_id": 1
-                    },
-                    {
-                        "transitions": [],
-                        "default_next_state_id": 0,
-                        "property_set_id": 2
-                    }
-                ],
-                "property_sets": [
-                    {},
-                    {"scope": "constructor"},
-                    {"scope": "function"}
-                ]
-            }
-        "##,
-        )
-        .unwrap();
-
-        let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
-        assert_eq!(cursor.node().kind(), "source_file");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        cursor.goto_first_child();
-        assert!(cursor.goto_first_child());
-        assert!(cursor.goto_next_sibling());
-        assert!(cursor.goto_next_sibling());
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), "block");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        assert!(cursor.goto_first_child());
-        assert!(cursor.goto_next_sibling());
-        assert_eq!(cursor.node().kind(), "call_expression");
-        assert_eq!(*cursor.node_properties(), empty_properties);
-
-        assert!(cursor.goto_first_child());
-        assert_eq!(cursor.node().kind(), "identifier");
-        assert_eq!(
-            cursor.node_properties().scope,
-            Some("constructor".to_owned())
-        );
-    }
-
-    #[test]
-    fn test_custom_utf8_input() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-
-        let lines = &["pub fn foo() {", "  1", "}"];
-
-        let tree = parser
-            .parse_utf8(
-                &mut |_, position| {
-                    let row = position.row;
-                    let column = position.column;
-                    if row < lines.len() {
-                        if column < lines[row].as_bytes().len() {
-                            &lines[row].as_bytes()[column..]
-                        } else {
-                            "\n".as_bytes()
-                        }
-                    } else {
-                        &[]
-                    }
-                },
-                None,
-            )
-            .unwrap();
-
-        let root = tree.root_node();
-        assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
-        assert_eq!(root.kind(), "source_file");
-        assert_eq!(root.has_error(), false);
-        assert_eq!(root.child(0).unwrap().kind(), "function_item");
-    }
-
-    #[test]
-    fn test_custom_utf16_input() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-
-        parser.set_logger(Some(Box::new(|t, message| {
-            println!("log: {:?} {}", t, message);
-        })));
-
-        let lines: Vec<Vec<u16>> = ["pub fn foo() {", "  1", "}"]
-            .iter()
-            .map(|s| s.encode_utf16().collect())
-            .collect();
-
-        let tree = parser
-            .parse_utf16(
-                &mut |_, position| {
-                    let row = position.row;
-                    let column = position.column;
-                    if row < lines.len() {
-                        if column < lines[row].len() {
-                            &lines[row][column..]
-                        } else {
-                            &[10]
-                        }
-                    } else {
-                        &[]
-                    }
-                },
-                None,
-            )
-            .unwrap();
-
-        let root = tree.root_node();
-        assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
-        assert_eq!(root.kind(), "source_file");
-        assert_eq!(root.has_error(), false);
-        assert_eq!(root.child(0).unwrap().kind(), "function_item");
-    }
-
-    #[test]
-    fn test_node_equality() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-        let tree = parser.parse_str("struct A {}", None).unwrap();
-        let node1 = tree.root_node();
-        let node2 = tree.root_node();
-        assert_eq!(node1, node2);
-        assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
-        assert_ne!(node1.child(0).unwrap(), node2);
-    }
-
-    #[test]
-    fn test_editing() {
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-
-        let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
-        let mut input_bytes_read = Vec::new();
-
-        let mut tree = parser
-            .parse_utf8(
-                &mut |offset, _| {
-                    let offset = offset;
-                    if offset < input_bytes.len() {
-                        let result = &input_bytes[offset..offset + 1];
-                        input_bytes_read.extend(result.iter());
-                        result
-                    } else {
-                        &[]
-                    }
-                },
-                None,
-            )
-            .unwrap();
-
-        let parameters_sexp = tree
-            .root_node()
-            .named_child(0)
-            .unwrap()
-            .named_child(1)
-            .unwrap()
-            .to_sexp();
-        assert_eq!(
-            parameters_sexp,
-            "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
-        );
-
-        input_bytes_read.clear();
-        input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
-        tree.edit(&InputEdit {
-            start_byte: 14,
-            old_end_byte: 14,
-            new_end_byte: 20,
-            start_position: Point::new(0, 14),
-            old_end_position: Point::new(0, 14),
-            new_end_position: Point::new(0, 20),
-        });
-
-        let tree = parser
-            .parse_utf8(
-                &mut |offset, _| {
-                    let offset = offset;
-                    if offset < input_bytes.len() {
-                        let result = &input_bytes[offset..offset + 1];
-                        input_bytes_read.extend(result.iter());
-                        result
-                    } else {
-                        &[]
-                    }
-                },
-                Some(&tree),
-            )
-            .unwrap();
-
-        let parameters_sexp = tree
-            .root_node()
-            .named_child(0)
-            .unwrap()
-            .named_child(1)
-            .unwrap()
-            .to_sexp();
-        assert_eq!(
-            parameters_sexp,
-            "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
-        );
-
-        let retokenized_content = String::from_utf8(input_bytes_read).unwrap();
-        assert!(retokenized_content.contains("b: B"));
-        assert!(!retokenized_content.contains("a: A"));
-        assert!(!retokenized_content.contains("c: C"));
-        assert!(!retokenized_content.contains("{}"));
-    }
-
-    #[test]
-    fn test_parallel_parsing() {
-        // Parse this source file so that each thread has a non-trivial amount of
-        // work to do.
-        let this_file_source = include_str!("lib.rs");
-
-        let mut parser = Parser::new();
-        parser.set_language(rust()).unwrap();
-        let tree = parser.parse_str(this_file_source, None).unwrap();
-
-        let mut parse_threads = Vec::new();
-        for thread_id in 1..5 {
-            let mut tree_clone = tree.clone();
-            parse_threads.push(thread::spawn(move || {
-                // For each thread, prepend a different number of declarations to the
-                // source code.
-                let mut prepend_line_count = 0;
-                let mut prepended_source = String::new();
-                for _ in 0..thread_id {
-                    prepend_line_count += 2;
-                    prepended_source += "struct X {}\n\n";
-                }
-
-                tree_clone.edit(&InputEdit {
-                    start_byte: 0,
-                    old_end_byte: 0,
-                    new_end_byte: prepended_source.len(),
-                    start_position: Point::new(0, 0),
-                    old_end_position: Point::new(0, 0),
-                    new_end_position: Point::new(prepend_line_count, 0),
-                });
-                prepended_source += this_file_source;
-
-                // Reparse using the old tree as a starting point.
-                let mut parser = Parser::new();
-                parser.set_language(rust()).unwrap();
-                parser
-                    .parse_str(&prepended_source, Some(&tree_clone))
-                    .unwrap()
-            }));
-        }
-
-        // Check that the trees have the expected relationship to one another.
-        let trees = parse_threads
-            .into_iter()
-            .map(|thread| thread.join().unwrap());
-        let child_count_differences = trees
-            .map(|t| t.root_node().child_count() - tree.root_node().child_count())
-            .collect::<Vec<_>>();
-
-        assert_eq!(child_count_differences, &[1, 2, 3, 4]);
-    }
-}
diff --git a/lib/build.rs b/lib/build.rs
index f8c19f05..7e8714ef 100644
--- a/lib/build.rs
+++ b/lib/build.rs
@@ -46,6 +46,7 @@ fn main() {
             println!("cargo:rerun-if-changed={}", parser_c_path.to_str().unwrap());
             parser_config
                 .include(&parser_src_path)
+                .opt_level(0)
                 .file(&parser_c_path);
             if scanner_cc_path.exists() {
                 println!("cargo:rerun-if-changed={}", scanner_cc_path.to_str().unwrap());

From cffe80bfaddde02aa7ac1a6e4e9c698da00f9602 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 09:48:19 -0800
Subject: [PATCH 122/208] Fix tie-breaking via cascade ordering in property
 sheets

---
 cli/src/generate/properties.rs | 43 ++++++++++++++--------------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index e3b60185..cca7fef8 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -157,7 +157,7 @@ impl Builder {
     }
 
     fn populate_state(&mut self, item_set: ItemSet, state_id: StateId) {
-        let mut transition_map: HashSet<PropertyTransitionJSON> = HashSet::new();
+        let mut transition_map: HashSet<(PropertyTransitionJSON, u32)> = HashSet::new();
         let mut selector_matches = Vec::new();
 
         // First, compute all of the possible state transition predicates for
@@ -170,13 +170,18 @@ impl Builder {
             // If this item has more elements remaining in its selector, then
             // add a state transition based on the next step.
             if let Some(step) = next_step {
-                transition_map.insert(PropertyTransitionJSON {
-                    kind: step.kind.clone(),
-                    named: step.is_named,
-                    index: step.child_index,
-                    text: step.text_pattern.clone(),
-                    state_id: 0,
-                });
+                transition_map.insert((
+                    PropertyTransitionJSON {
+                        kind: step.kind.clone(),
+                        named: step.is_named,
+                        index: step.child_index,
+                        text: step.text_pattern.clone(),
+                        state_id: 0,
+                    },
+
+                    // Include the rule id so that it can be used when sorting transitions.
+                    item.rule_id,
+                ));
             }
             // If the item has matched its entire selector, then the item's
             // properties are applicable to this state.
@@ -192,9 +197,8 @@ impl Builder {
         // destination state.
         let mut transition_list: Vec<(PropertyTransitionJSON, u32)> = transition_map
             .into_iter()
-            .map(|mut transition| {
+            .map(|(mut transition, rule_id)| {
                 let mut next_item_set = ItemSet::new();
-                let mut latest_matching_rule_id = 0;
                 for item in &item_set {
                     let rule = &self.rules[item.rule_id as usize];
                     let selector = &rule.selectors[item.selector_id as usize];
@@ -205,21 +209,11 @@ impl Builder {
                         // advance the item to the next part of its selector and add the
                         // resulting item to this transition's destination state.
                         if step_matches_transition(step, &transition) {
-                            let next_item = Item {
+                            next_item_set.insert(Item {
                                 rule_id: item.rule_id,
                                 selector_id: item.selector_id,
                                 step_id: item.step_id + 1,
-                            };
-
-                            next_item_set.insert(next_item);
-
-                            // If the next item is at the end of its selector, record its rule id
-                            // so that the rule id can be used when sorting this state's transitions.
-                            if selector.0.get(item.step_id as usize + 1).is_none()
-                                && item.rule_id > latest_matching_rule_id
-                            {
-                                latest_matching_rule_id = item.rule_id;
-                            }
+                            });
                         }
 
                         // If the next step of the item is not an immediate child, then
@@ -232,7 +226,7 @@ impl Builder {
                 }
 
                 transition.state_id = self.add_state(next_item_set);
-                (transition, latest_matching_rule_id)
+                (transition, rule_id)
             })
             .collect();
 
@@ -447,9 +441,6 @@ pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
 
 fn generate_property_sheet(path: impl AsRef<Path>, css: &str) -> Result<PropertySheetJSON> {
     let rules = parse_property_sheet(path.as_ref(), &css)?;
-    for rule in &rules {
-        eprintln!("rule {:?}", rule);
-    }
     Ok(Builder::new(rules).build())
 }
 

From 0d85a1ef53e98cdfb060e02d029b2637e3b1e79a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 09:48:45 -0800
Subject: [PATCH 123/208] Exclude final newlines from inputs when parsing
 corpus files

---
 cli/src/test.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cli/src/test.rs b/cli/src/test.rs
index 790e9ec7..e064dffd 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -16,7 +16,7 @@ lazy_static! {
         .multi_line(true)
         .build()
         .unwrap();
-    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+\r?\n")
+    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"\r?\n---+\r?\n")
         .multi_line(true)
         .build()
         .unwrap();
@@ -251,12 +251,12 @@ d
                 children: vec![
                     TestEntry::Example {
                         name: "The first test".to_string(),
-                        input: "\na b c\n\n".as_bytes().to_vec(),
+                        input: "\na b c\n".as_bytes().to_vec(),
                         output: "(a (b c))".to_string(),
                     },
                     TestEntry::Example {
                         name: "The second test".to_string(),
-                        input: "d\n".as_bytes().to_vec(),
+                        input: "d".as_bytes().to_vec(),
                         output: "(d)".to_string(),
                     },
                 ]

From e64f7a64a11ceeb21e5425133a564e5bcf9022f1 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 13:30:45 -0800
Subject: [PATCH 124/208] Start work on running test corpus tests

---
 Cargo.lock                                    |   1 +
 cli/Cargo.toml                                |   1 +
 cli/build.rs                                  |   6 +
 .../build_tables/build_parse_table.rs         |  51 +---
 cli/src/generate/build_tables/mod.rs          |  41 +++-
 cli/src/generate/mod.rs                       |  67 ++++--
 cli/src/loader.rs                             | 226 +++++++++++-------
 cli/src/main.rs                               |  13 +-
 cli/src/tests/corpuses.rs                     |  76 +++++-
 cli/src/tests/parser_api.rs                   |   2 +-
 .../test_grammars/aliased_rules/grammar.json  |   2 +-
 .../grammar.json                              |   2 +-
 .../external_tokens/grammar.json              |   2 +-
 .../inverted_external_token/grammar.json      |   2 +-
 .../precedence_on_subsequence/grammar.json    |   4 +-
 15 files changed, 328 insertions(+), 168 deletions(-)
 create mode 100644 cli/build.rs

diff --git a/Cargo.lock b/Cargo.lock
index fa7712ba..464cd050 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -774,6 +774,7 @@ name = "tree-sitter-cli"
 version = "0.1.0"
 dependencies = [
  "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
  "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 2eabd88f..b6226917 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -9,6 +9,7 @@ name = "tree-sitter"
 path = "src/main.rs"
 
 [dependencies]
+cc = "1.0"
 ansi_term = "0.11"
 difference = "2.0"
 lazy_static = "1.2.0"
diff --git a/cli/build.rs b/cli/build.rs
new file mode 100644
index 00000000..e0ebd1c4
--- /dev/null
+++ b/cli/build.rs
@@ -0,0 +1,6 @@
+fn main() {
+    println!(
+        "cargo:rustc-env=BUILD_TARGET={}",
+        std::env::var("TARGET").unwrap()
+    );
+}
diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs
index 73c9c0e2..6af85b4c 100644
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@@ -1,7 +1,9 @@
 use super::item::{ParseItem, ParseItemSet, TokenSet};
 use super::item_set_builder::ParseItemSetBuilder;
 use crate::error::{Error, Result};
-use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
+use crate::generate::grammars::{
+    InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
+};
 use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
 use crate::generate::tables::{
     AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
@@ -11,6 +13,7 @@ use hashbrown::hash_map::Entry;
 use hashbrown::{HashMap, HashSet};
 use std::collections::hash_map::DefaultHasher;
 use std::collections::VecDeque;
+use std::u32;
 
 use std::fmt::Write;
 use std::hash::Hasher;
@@ -94,7 +97,6 @@ impl<'a> ParseTableBuilder<'a> {
             )?;
         }
 
-        self.populate_used_symbols();
         self.remove_precedences();
 
         Ok((self.parse_table, self.following_tokens))
@@ -313,7 +315,10 @@ impl<'a> ParseTableBuilder<'a> {
                         .first_set(&step.symbol)
                         .contains(&conflicting_lookahead)
                     {
-                        conflicting_items.insert(item);
+                        if item.variable_index != u32::MAX {
+                            conflicting_items.insert(item);
+                        }
+
                         let precedence = item.precedence();
                         if let Some(range) = &mut shift_precedence {
                             if precedence < range.start {
@@ -327,7 +332,9 @@ impl<'a> ParseTableBuilder<'a> {
                     }
                 }
             } else if lookaheads.contains(&conflicting_lookahead) {
-                conflicting_items.insert(item);
+                if item.variable_index != u32::MAX {
+                    conflicting_items.insert(item);
+                }
             }
         }
 
@@ -610,40 +617,6 @@ impl<'a> ParseTableBuilder<'a> {
         }
     }
 
-    fn populate_used_symbols(&mut self) {
-        let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
-        let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
-        let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
-        for state in &self.parse_table.states {
-            for symbol in state.terminal_entries.keys() {
-                match symbol.kind {
-                    SymbolType::Terminal => terminal_usages[symbol.index] = true,
-                    SymbolType::External => external_usages[symbol.index] = true,
-                    _ => {}
-                }
-            }
-            for symbol in state.nonterminal_entries.keys() {
-                non_terminal_usages[symbol.index] = true;
-            }
-        }
-        for (i, value) in external_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::external(i));
-            }
-        }
-        self.parse_table.symbols.push(Symbol::end());
-        for (i, value) in terminal_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::terminal(i));
-            }
-        }
-        for (i, value) in non_terminal_usages.into_iter().enumerate() {
-            if value {
-                self.parse_table.symbols.push(Symbol::non_terminal(i));
-            }
-        }
-    }
-
     fn remove_precedences(&mut self) {
         for state in self.parse_table.states.iter_mut() {
             for (_, entry) in state.terminal_entries.iter_mut() {
@@ -702,7 +675,7 @@ impl<'a> ParseTableBuilder<'a> {
                 if variable.kind == VariableType::Named {
                     variable.name.clone()
                 } else {
-                    format!("\"{}\"", &variable.name)
+                    format!("'{}'", &variable.name)
                 }
             }
         }
diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index b8432fe5..52c6abac 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -15,7 +15,7 @@ use self::token_conflicts::TokenConflictMap;
 use crate::error::Result;
 use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
 use crate::generate::nfa::{CharacterSet, NfaCursor};
-use crate::generate::rules::{AliasMap, Symbol};
+use crate::generate::rules::{AliasMap, Symbol, SymbolType};
 use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
 
 pub(crate) fn build_tables(
@@ -45,6 +45,7 @@ pub(crate) fn build_tables(
         &token_conflict_map,
         &keywords,
     );
+    populate_used_symbols(&mut parse_table, syntax_grammar, lexical_grammar);
     mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
     if minimize {
         minimize_parse_table(
@@ -151,6 +152,44 @@ fn populate_error_state(
     state.terminal_entries.insert(Symbol::end(), recover_entry);
 }
 
+fn populate_used_symbols(
+    parse_table: &mut ParseTable,
+    syntax_grammar: &SyntaxGrammar,
+    lexical_grammar: &LexicalGrammar,
+) {
+    let mut terminal_usages = vec![false; lexical_grammar.variables.len()];
+    let mut non_terminal_usages = vec![false; syntax_grammar.variables.len()];
+    let mut external_usages = vec![false; syntax_grammar.external_tokens.len()];
+    for state in &parse_table.states {
+        for symbol in state.terminal_entries.keys() {
+            match symbol.kind {
+                SymbolType::Terminal => terminal_usages[symbol.index] = true,
+                SymbolType::External => external_usages[symbol.index] = true,
+                _ => {}
+            }
+        }
+        for symbol in state.nonterminal_entries.keys() {
+            non_terminal_usages[symbol.index] = true;
+        }
+    }
+    for (i, value) in external_usages.into_iter().enumerate() {
+        if value {
+            parse_table.symbols.push(Symbol::external(i));
+        }
+    }
+    parse_table.symbols.push(Symbol::end());
+    for (i, value) in terminal_usages.into_iter().enumerate() {
+        if value {
+            parse_table.symbols.push(Symbol::terminal(i));
+        }
+    }
+    for (i, value) in non_terminal_usages.into_iter().enumerate() {
+        if value {
+            parse_table.symbols.push(Symbol::non_terminal(i));
+        }
+    }
+}
+
 fn identify_keywords(
     lexical_grammar: &LexicalGrammar,
     parse_table: &ParseTable,
diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 0899d793..5d89bbfe 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -3,6 +3,7 @@ use self::parse_grammar::parse_grammar;
 use self::prepare_grammar::prepare_grammar;
 use self::render::render_c_code;
 use crate::error::Result;
+use regex::{Regex, RegexBuilder};
 use std::fs;
 use std::io::Write;
 use std::path::PathBuf;
@@ -18,7 +19,14 @@ mod render;
 mod rules;
 mod tables;
 
-pub fn generate_parser_for_grammar(
+lazy_static! {
+    static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
+        .multi_line(true)
+        .build()
+        .unwrap();
+}
+
+pub fn generate_parser_in_directory(
     repo_path: &PathBuf,
     minimize: bool,
     state_ids_to_log: Vec<usize>,
@@ -26,33 +34,48 @@ pub fn generate_parser_for_grammar(
 ) -> Result<()> {
     if !properties_only {
         let grammar_json = load_js_grammar_file(&repo_path.join("grammar.js"));
-        let input_grammar = parse_grammar(&grammar_json)?;
-        let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
-            prepare_grammar(&input_grammar)?;
-        let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
-            &syntax_grammar,
-            &lexical_grammar,
-            &simple_aliases,
-            &inlines,
-            minimize,
-            state_ids_to_log,
-        )?;
-        let c_code = render_c_code(
-            &input_grammar.name,
-            parse_table,
-            main_lex_table,
-            keyword_lex_table,
-            keyword_capture_token,
-            syntax_grammar,
-            lexical_grammar,
-            simple_aliases,
-        );
+        let c_code =
+            generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
         fs::write(repo_path.join("src").join("parser.c"), c_code)?;
     }
     properties::generate_property_sheets(repo_path)?;
     Ok(())
 }
 
+#[cfg(test)]
+pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<String> {
+    let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
+    generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
+}
+
+fn generate_parser_for_grammar_with_opts(
+    grammar_json: &str,
+    minimize: bool,
+    state_ids_to_log: Vec<usize>,
+) -> Result<String> {
+    let input_grammar = parse_grammar(grammar_json)?;
+    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
+        prepare_grammar(&input_grammar)?;
+    let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
+        &syntax_grammar,
+        &lexical_grammar,
+        &simple_aliases,
+        &inlines,
+        minimize,
+        state_ids_to_log,
+    )?;
+    Ok(render_c_code(
+        &input_grammar.name,
+        parse_table,
+        main_lex_table,
+        keyword_lex_table,
+        keyword_capture_token,
+        syntax_grammar,
+        lexical_grammar,
+        simple_aliases,
+    ))
+}
+
 fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
     let mut node_process = Command::new("node")
         .stdin(Stdio::piped())
diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index 7dfb233b..e056bbaa 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -6,6 +6,7 @@ use std::io;
 use std::mem;
 use std::path::{Path, PathBuf};
 use std::process::Command;
+use std::time::SystemTime;
 use tree_sitter::{Language, PropertySheet};
 
 const PACKAGE_JSON_PATH: &'static str = "package.json";
@@ -37,7 +38,7 @@ pub struct LanguageConfiguration {
 pub struct Loader {
     parser_lib_path: PathBuf,
     language_repos: Vec<LanguageRepo>,
-    language_configuration_indices_by_file_type: HashMap<String, Vec<(usize, usize)>>,
+    language_configuration_ids_by_file_type: HashMap<String, Vec<(usize, usize)>>,
 }
 
 unsafe impl Send for Loader {}
@@ -48,19 +49,20 @@ impl Loader {
         Loader {
             parser_lib_path,
             language_repos: Vec::new(),
-            language_configuration_indices_by_file_type: HashMap::new(),
+            language_configuration_ids_by_file_type: HashMap::new(),
         }
     }
 
-    pub fn find_parsers(&mut self, parser_src_paths: &Vec<PathBuf>) -> io::Result<()> {
+    pub fn find_all_languages(&mut self, parser_src_paths: &Vec<PathBuf>) -> io::Result<()> {
         for parser_container_dir in parser_src_paths.iter() {
             for entry in fs::read_dir(parser_container_dir)? {
                 let entry = entry?;
                 if let Some(parser_dir_name) = entry.file_name().to_str() {
                     if parser_dir_name.starts_with("tree-sitter-") {
-                        if self.load_language_configurations(
-                            &parser_container_dir.join(parser_dir_name),
-                        ).is_err() {
+                        if self
+                            .find_language_at_path(&parser_container_dir.join(parser_dir_name))
+                            .is_err()
+                        {
                             eprintln!("Error loading {}", parser_dir_name);
                         }
                     }
@@ -70,90 +72,126 @@ impl Loader {
         Ok(())
     }
 
-    pub fn language_configuration_at_path(
-        &mut self,
-        path: &Path,
-    ) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
-        let repo_index = self.load_language_configurations(path)?;
-        self.load_language_from_repo(repo_index, 0)
-    }
-
-    pub fn language_for_file_name(
-        &mut self,
-        path: &Path,
-    ) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
-        let indices = path
-            .file_name()
-            .and_then(|n| n.to_str())
-            .and_then(|file_name| {
-                self.language_configuration_indices_by_file_type
-                    .get(file_name)
-            })
-            .or_else(|| {
-                path.extension()
-                    .and_then(|extension| extension.to_str())
-                    .and_then(|extension| {
-                        self.language_configuration_indices_by_file_type
-                            .get(extension)
-                    })
-            });
-
-        if let Some(indices) = indices {
-            // TODO use `content-regex` to pick one
-            for (repo_index, conf_index) in indices {
-                return self.load_language_from_repo(*repo_index, *conf_index);
-            }
-        }
-        Ok(None)
-    }
-
-    fn load_language_from_repo(
-        &mut self,
-        repo_index: usize,
-        conf_index: usize,
-    ) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
-        let repo = &self.language_repos[repo_index];
-        let language = if let Some(language) = repo.language {
-            language
-        } else {
-            let language = self.load_language_at_path(&repo.name, &repo.path)?;
-            self.language_repos[repo_index].language = Some(language);
-            language
-        };
-        if let Some(configuration) = self.language_repos[repo_index]
-            .configurations
-            .get(conf_index)
-        {
-            Ok(Some((language, configuration)))
+    pub fn language_at_path(&mut self, path: &Path) -> io::Result<Option<Language>> {
+        if let Ok(id) = self.find_language_at_path(path) {
+            Ok(Some(self.language_configuration_for_id(id)?.0))
         } else {
             Ok(None)
         }
     }
 
+    pub fn language_configuration_for_file_name(
+        &mut self,
+        path: &Path,
+    ) -> io::Result<Option<(Language, &LanguageConfiguration)>> {
+        let ids = path
+            .file_name()
+            .and_then(|n| n.to_str())
+            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
+            .or_else(|| {
+                path.extension()
+                    .and_then(|extension| extension.to_str())
+                    .and_then(|extension| {
+                        self.language_configuration_ids_by_file_type.get(extension)
+                    })
+            });
+        if let Some(ids) = ids {
+            // TODO use `content-regex` to pick one
+            for (repo_id, configuration_id) in ids.iter().cloned() {
+                let (language, configurations) = self.language_configuration_for_id(repo_id)?;
+                return Ok(Some((language, &configurations[configuration_id])));
+            }
+        }
+        Ok(None)
+    }
+
+    fn language_configuration_for_id(
+        &mut self,
+        id: usize,
+    ) -> io::Result<(Language, &Vec<LanguageConfiguration>)> {
+        let repo = &self.language_repos[id];
+        let language = if let Some(language) = repo.language {
+            language
+        } else {
+            let language = self.load_language_at_path(&repo.name, &repo.path)?;
+            self.language_repos[id].language = Some(language);
+            language
+        };
+        Ok((language, &self.language_repos[id].configurations))
+    }
+
     fn load_language_at_path(&self, name: &str, language_path: &Path) -> io::Result<Language> {
-        let parser_c_path = language_path.join(PARSER_C_PATH);
+        let src_path = language_path.join("src");
+        let parser_c_path = src_path.join("parser.c");
+
+        let scanner_path;
+        let scanner_c_path = src_path.join("scanner.c");
+        if scanner_c_path.exists() {
+            scanner_path = Some(scanner_c_path);
+        } else {
+            let scanner_cc_path = src_path.join("scanner.cc");
+            if scanner_cc_path.exists() {
+                scanner_path = Some(scanner_cc_path);
+            } else {
+                scanner_path = None;
+            }
+        }
+
+        self.load_language_from_sources(name, &src_path, &parser_c_path, &scanner_path)
+    }
+
+    pub fn load_language_from_sources(
+        &self,
+        name: &str,
+        header_path: &Path,
+        parser_path: &Path,
+        scanner_path: &Option<PathBuf>,
+    ) -> io::Result<Language> {
         let mut library_path = self.parser_lib_path.join(name);
         library_path.set_extension(DYLIB_EXTENSION);
 
-        if !library_path.exists() || was_modified_more_recently(&parser_c_path, &library_path)? {
-            let compiler_name = std::env::var("CXX").unwrap_or("c++".to_owned());
-            let mut command = Command::new(compiler_name);
-            command
-                .arg("-shared")
-                .arg("-fPIC")
-                .arg("-I")
-                .arg(language_path.join("src"))
-                .arg("-o")
-                .arg(&library_path)
-                .arg("-xc")
-                .arg(parser_c_path);
-            let scanner_c_path = language_path.join(SCANNER_C_PATH);
-            let scanner_cc_path = language_path.join(SCANNER_CC_PATH);
-            if scanner_c_path.exists() {
-                command.arg("-xc").arg(scanner_c_path);
-            } else if scanner_cc_path.exists() {
-                command.arg("-xc++").arg(scanner_cc_path);
+        if needs_recompile(&library_path, &parser_path, &scanner_path)? {
+            let mut config = cc::Build::new();
+            config
+                .opt_level(2)
+                .cargo_metadata(false)
+                .target(env!("BUILD_TARGET"))
+                .host(env!("BUILD_TARGET"));
+            let compiler = config.get_compiler();
+            let compiler_path = compiler.path();
+            let mut command = Command::new(compiler_path);
+
+            if cfg!(windows) {
+                command
+                    .args(&["/nologo", "/LD", "/I"])
+                    .arg(header_path)
+                    .arg("/Od")
+                    .arg(parser_path);
+                if let Some(scanner_path) = scanner_path.as_ref() {
+                    command.arg(scanner_path);
+                }
+                command
+                    .arg("/link")
+                    .arg(format!("/out:{}", library_path.to_str().unwrap()));
+            } else {
+                command
+                    .arg("-shared")
+                    .arg("-fPIC")
+                    .arg("-I")
+                    .arg(header_path)
+                    .arg("-o")
+                    .arg(&library_path)
+                    .arg("-xc")
+                    .arg(parser_path);
+                if let Some(scanner_path) = scanner_path.as_ref() {
+                    if scanner_path.extension() == Some("c".as_ref()) {
+                        command.arg(scanner_path);
+                    } else {
+                        command.arg("-xc++").arg(scanner_path);
+                    }
+                }
             }
+
             command.output()?;
         }
 
@@ -168,7 +206,7 @@ impl Loader {
         Ok(language)
     }
 
-    fn load_language_configurations<'a>(&'a mut self, parser_path: &Path) -> io::Result<usize> {
+    fn find_language_at_path<'a>(&'a mut self, parser_path: &Path) -> io::Result<usize> {
         let name = parser_path
             .file_name()
             .unwrap()
@@ -218,7 +256,7 @@ impl Loader {
 
         for (i, configuration) in configurations.iter().enumerate() {
             for file_type in &configuration.file_types {
-                self.language_configuration_indices_by_file_type
+                self.language_configuration_ids_by_file_type
                     .entry(file_type.to_string())
                     .or_insert(Vec::new())
                     .push((self.language_repos.len(), i));
@@ -236,6 +274,26 @@ impl Loader {
     }
 }
 
-fn was_modified_more_recently(a: &Path, b: &Path) -> io::Result<bool> {
-    Ok(fs::metadata(a)?.modified()? > fs::metadata(b)?.modified()?)
+fn needs_recompile(
+    lib_path: &Path,
+    parser_c_path: &Path,
+    scanner_path: &Option<PathBuf>,
+) -> io::Result<bool> {
+    if !lib_path.exists() {
+        return Ok(true);
+    }
+    let lib_mtime = mtime(lib_path)?;
+    if mtime(parser_c_path)? > lib_mtime {
+        return Ok(true);
+    }
+    if let Some(scanner_path) = scanner_path {
+        if mtime(scanner_path)? > lib_mtime {
+            return Ok(true);
+        }
+    }
+    Ok(false)
+}
+
+fn mtime(path: &Path) -> io::Result<SystemTime> {
+    Ok(fs::metadata(path)?.modified()?)
 }
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 9f095668..dda4bdca 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -89,13 +89,18 @@ fn run() -> error::Result<()> {
                 ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
                     .collect()
             });
-        generate::generate_parser_for_grammar(&current_dir, minimize, state_ids_to_log, properties_only)?;
+        generate::generate_parser_in_directory(
+            &current_dir,
+            minimize,
+            state_ids_to_log,
+            properties_only,
+        )?;
     } else if let Some(matches) = matches.subcommand_matches("test") {
         let debug = matches.is_present("debug");
         let debug_graph = matches.is_present("debug-graph");
         let filter = matches.value_of("filter");
         let corpus_path = current_dir.join("corpus");
-        if let Some((language, _)) = loader.language_configuration_at_path(&current_dir)? {
+        if let Some(language) = loader.language_at_path(&current_dir)? {
             test::run_tests_at_path(language, &corpus_path, debug, debug_graph, filter)?;
         } else {
             eprintln!("No language found");
@@ -103,9 +108,9 @@ fn run() -> error::Result<()> {
     } else if let Some(matches) = matches.subcommand_matches("parse") {
         let debug = matches.is_present("debug");
         let debug_graph = matches.is_present("debug-graph");
-        loader.find_parsers(&vec![home_dir.join("github")])?;
+        loader.find_all_languages(&vec![home_dir.join("github")])?;
         let source_path = Path::new(matches.value_of("path").unwrap());
-        if let Some((language, _)) = loader.language_for_file_name(source_path)? {
+        if let Some((language, _)) = loader.language_configuration_for_file_name(source_path)? {
             parse::parse_file_at_path(language, source_path, debug, debug_graph)?;
         } else {
             eprintln!("No language found");
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index b70bb371..624786fc 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -1,6 +1,9 @@
 use super::languages;
+use crate::generate;
+use crate::loader::Loader;
 use crate::test::{parse_tests, TestEntry};
-use std::path::PathBuf;
+use std::fs;
+use std::path::{Path, PathBuf};
 use tree_sitter::{Language, Parser};
 
 lazy_static! {
@@ -12,20 +15,16 @@ lazy_static! {
         ("html", languages::html()),
         ("javascript", languages::javascript()),
     ];
+    static ref ROOT_DIR: PathBuf = [env!("CARGO_MANIFEST_DIR"), ".."].iter().collect();
+    static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
+    static ref SCRATCH_DIR: PathBuf = ROOT_DIR.join("target").join("scratch");
+    static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
 }
 
 #[test]
-fn test_corpus_files() {
+fn test_real_language_corpus_files() {
     let mut parser = Parser::new();
-    let grammars_dir: PathBuf = [
-        env!("CARGO_MANIFEST_DIR"),
-        "..",
-        "test",
-        "fixtures",
-        "grammars",
-    ]
-    .iter()
-    .collect();
+    let grammars_dir = FIXTURES_DIR.join("grammars");
 
     for (name, language) in LANGUAGES.iter().cloned() {
         let corpus_dir = grammars_dir.join(name).join("corpus");
@@ -35,6 +34,61 @@ fn test_corpus_files() {
     }
 }
 
+#[test]
+fn test_feature_corpus_files() {
+    fs::create_dir_all(SCRATCH_DIR.as_path()).unwrap();
+
+    let mut loader = Loader::new(SCRATCH_DIR.clone());
+    let mut parser = Parser::new();
+    let test_grammars_dir = FIXTURES_DIR.join("test_grammars");
+
+    for entry in fs::read_dir(&test_grammars_dir).unwrap() {
+        let entry = entry.unwrap();
+        let test_name = entry.file_name();
+        let test_name = test_name.to_str().unwrap();
+
+        eprintln!("test name: {}", test_name);
+        let test_path = entry.path();
+        let grammar_path = test_path.join("grammar.json");
+        let corpus_path = test_path.join("corpus.txt");
+        let error_message_path = test_path.join("expected_error.txt");
+
+        let grammar_json = fs::read_to_string(grammar_path).unwrap();
+        let generate_result = generate::generate_parser_for_grammar(&grammar_json);
+        if error_message_path.exists() {
+            continue;
+            if let Err(e) = generate_result {
+                assert_eq!(e.0, fs::read_to_string(&error_message_path).unwrap());
+            } else {
+                panic!(
+                    "Expected error message but got none for test grammar '{}'",
+                    test_name
+                );
+            }
+        } else {
+            let c_code = generate_result.unwrap();
+            let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", test_name));
+            fs::write(&parser_c_path, c_code).unwrap();
+            let scanner_path = test_path.join("scanner.c");
+            let scanner_path = if scanner_path.exists() {
+                Some(scanner_path)
+            } else {
+                None
+            };
+            let language = loader
+                .load_language_from_sources(test_name, &HEADER_DIR, &parser_c_path, &scanner_path)
+                .unwrap();
+        }
+    }
+
+    // for (name, language) in LANGUAGES.iter().cloned() {
+    //     let corpus_dir = grammars_dir.join(name).join("corpus");
+    //     let test = parse_tests(&corpus_dir).unwrap();
+    //     parser.set_language(language).unwrap();
+    //     run_mutation_tests(&mut parser, test);
+    // }
+}
+
 fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
     match test {
         TestEntry::Example {
diff --git a/cli/src/tests/parser_api.rs b/cli/src/tests/parser_api.rs
index af5ba71f..e32c292b 100644
--- a/cli/src/tests/parser_api.rs
+++ b/cli/src/tests/parser_api.rs
@@ -1,6 +1,6 @@
 use super::languages::rust;
 use std::thread;
-use tree_sitter::{InputEdit, LogType, Parser, Point, PropertySheet, Range};
+use tree_sitter::{InputEdit, LogType, Parser, Point, PropertySheet};
 
 #[test]
 fn test_basic_parsing() {
diff --git a/test/fixtures/test_grammars/aliased_rules/grammar.json b/test/fixtures/test_grammars/aliased_rules/grammar.json
index 391f780f..a66bfb78 100644
--- a/test/fixtures/test_grammars/aliased_rules/grammar.json
+++ b/test/fixtures/test_grammars/aliased_rules/grammar.json
@@ -40,7 +40,7 @@
           {"type": "SYMBOL", "name": "_expression"},
           {"type": "STRING", "value": "("},
           {"type": "SYMBOL", "name": "_expression"},
-          {"type": "STRING", "value": ")"},
+          {"type": "STRING", "value": ")"}
         ]
       }
     },
diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/grammar.json b/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/grammar.json
index 0be2008c..d97d9c9d 100644
--- a/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/grammar.json
+++ b/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/grammar.json
@@ -2,7 +2,7 @@
   "name": "conflict_in_repeat_rule_after_external_token",
 
   "externals": [
-    {"type": "SYMBOL", "name": "_program_start"},
+    {"type": "SYMBOL", "name": "_program_start"}
   ],
 
   "rules": {
diff --git a/test/fixtures/test_grammars/external_tokens/grammar.json b/test/fixtures/test_grammars/external_tokens/grammar.json
index d61a978f..e5ca6bc9 100644
--- a/test/fixtures/test_grammars/external_tokens/grammar.json
+++ b/test/fixtures/test_grammars/external_tokens/grammar.json
@@ -45,7 +45,7 @@
             {"type": "SYMBOL", "name": "expression"},
             {"type": "SYMBOL", "name": "_percent_string_end"}
           ]
-        },
+        }
       ]
     },
 
diff --git a/test/fixtures/test_grammars/inverted_external_token/grammar.json b/test/fixtures/test_grammars/inverted_external_token/grammar.json
index a43cedcc..6dee3d03 100644
--- a/test/fixtures/test_grammars/inverted_external_token/grammar.json
+++ b/test/fixtures/test_grammars/inverted_external_token/grammar.json
@@ -15,7 +15,7 @@
       "content": {
         "type": "SYMBOL",
         "name": "statement"
-      },
+      }
     },
 
     "statement": {
diff --git a/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json b/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json
index d05db765..d992793c 100644
--- a/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json
+++ b/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json
@@ -110,7 +110,7 @@
             "type": "SEQ",
             "members": [
               {"type": "STRING", "value": "::"},
-              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "expression"}
             ]
           }
         ]
@@ -132,4 +132,4 @@
       "value": "[a-zA-Z]+"
     }
   }
-}
\ No newline at end of file
+}

From 45c8cf47ea3f6a53ca3fe31283b399062697e0f7 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 13:49:49 -0800
Subject: [PATCH 125/208] Enable backtraces on CI

---
 .appveyor.yml | 1 +
 .travis.yml   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.appveyor.yml b/.appveyor.yml
index 1d9fb179..f1acceda 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,4 +1,5 @@
 environment:
+  RUST_BACKTRACE: full
   TREE_SITTER_TEST: true
 
 build: false
diff --git a/.travis.yml b/.travis.yml
index 47b88e81..46bb9a95 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,6 +5,7 @@ rust:
 
 env:
   - TREE_SITTER_TEST=1
+  - RUST_BACKTRACE=full
 
 before_install:
   - ./script/fetch-fixtures

From fa283dcf27b897891e3203a527a6263dde198553 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 14:44:32 -0800
Subject: [PATCH 126/208] Use the compiler environment vars computed by the cc
 config

---
 cli/src/loader.rs | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index e056bbaa..af1ab7be 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -9,11 +9,6 @@ use std::process::Command;
 use std::time::SystemTime;
 use tree_sitter::{Language, PropertySheet};
 
-const PACKAGE_JSON_PATH: &'static str = "package.json";
-const PARSER_C_PATH: &'static str = "src/parser.c";
-const SCANNER_C_PATH: &'static str = "src/scanner.c";
-const SCANNER_CC_PATH: &'static str = "src/scanner.cc";
-
 #[cfg(unix)]
 const DYLIB_EXTENSION: &'static str = "so";
 
@@ -158,8 +153,10 @@ impl Loader {
                 .target(env!("BUILD_TARGET"))
                 .host(env!("BUILD_TARGET"));
             let compiler = config.get_compiler();
-            let compiler_path = compiler.path();
-            let mut command = Command::new(compiler_path);
+            let mut command = Command::new(compiler.path());
+            for (key, value) in compiler.env() {
+                command.env(key, value);
+            }
 
             if cfg!(windows) {
                 command
@@ -192,7 +189,17 @@ impl Loader {
                 }
             }
 
-            command.output()?;
+            let output = command.output()?;
+            if !output.status.success() {
+                return Err(io::Error::new(
+                    io::ErrorKind::Other,
+                    format!(
+                        "Parser compilation failed.\nStdout: {}\nStderr: {}",
+                        String::from_utf8_lossy(&output.stdout),
+                        String::from_utf8_lossy(&output.stderr)
+                    ).as_str(),
+                ));
+            }
         }
 
         let library = Library::new(library_path)?;
@@ -233,7 +240,7 @@ impl Loader {
             tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
         }
 
-        let package_json_contents = fs::read_to_string(&parser_path.join(PACKAGE_JSON_PATH))?;
+        let package_json_contents = fs::read_to_string(&parser_path.join("package.json"))?;
         let package_json: PackageJSON = serde_json::from_str(&package_json_contents)?;
         let configurations = package_json
             .tree_sitter

From 88f1c4af8edb82d1893a9bc7ab478dc05cc4ec7b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 14:48:29 -0800
Subject: [PATCH 127/208] Ensure the .tree-sitter directory exists

---
 cli/src/main.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index dda4bdca..5a830458 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -23,6 +23,7 @@ mod tests;
 use self::loader::Loader;
 use clap::{App, Arg, SubCommand};
 use std::env;
+use std::fs;
 use std::path::Path;
 use std::process::exit;
 use std::usize;
@@ -74,7 +75,9 @@ fn run() -> error::Result<()> {
 
     let home_dir = dirs::home_dir().unwrap();
     let current_dir = env::current_dir().unwrap();
-    let mut loader = Loader::new(home_dir.join(".tree-sitter"));
+    let config_dir = home_dir.join(".tree-sitter");
+    fs::create_dir_all(&config_dir).unwrap();
+    let mut loader = Loader::new(config_dir);
 
     if let Some(matches) = matches.subcommand_matches("generate") {
         if matches.is_present("log") {

From acfa0c524a7eb7ca417eb75375fbd7653d6d2c03 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 14:48:36 -0800
Subject: [PATCH 128/208] Fix env var setup on travis

---
 .travis.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 46bb9a95..0d4f8cd5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,8 +4,7 @@ rust:
   - stable
 
 env:
-  - TREE_SITTER_TEST=1
-  - RUST_BACKTRACE=full
+  - TREE_SITTER_TEST=1 RUST_BACKTRACE=full
 
 before_install:
   - ./script/fetch-fixtures

From 0236de79635740534cb5f01238a23525965a01d2 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 14:54:30 -0800
Subject: [PATCH 129/208] Tweak caching setup on appveyor

---
 .appveyor.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index f1acceda..147827e9 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,14 +5,15 @@ environment:
 build: false
 
 install:
-  - git submodule update --init --recursive
-
+  # Install rust
   - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
   - rustup-init -yv --default-toolchain stable
-  - set PATH=%PATH%;%USERPROFILE%\.cargo\bin
+  - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
   - rustc -vV
   - cargo -vV
 
+  # Install dependencies
+  - git submodule update --init
   - script\fetch-fixtures.cmd
 
 test_script:
@@ -27,3 +28,5 @@ cache:
   - target
   - test\fixtures\grammars
   - C:\Users\appveyor\.cargo
+  - C:\cargo\registry
+  - C:\cargo\git

From 6592fdd24cb63fd1967fcf5f51fc8b947b96284c Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 17:26:45 -0800
Subject: [PATCH 130/208] Fix parser generation error messages

---
 .../build_tables/build_parse_table.rs         | 113 +++++++-----
 .../build_tables/coincident_tokens.rs         |  12 +-
 cli/src/generate/build_tables/item.rs         |   4 +-
 cli/src/generate/build_tables/mod.rs          |  12 +-
 .../prepare_grammar/flatten_grammar.rs        | 170 +++++++++++-------
 .../prepare_grammar/intern_symbols.rs         |   2 +-
 cli/src/generate/render.rs                    |  12 +-
 cli/src/tests/corpuses.rs                     |  37 +++-
 .../expected_error.txt                        |  12 +-
 .../expected_error.txt                        |  12 +-
 .../conflicting_precedence/expected_error.txt |   8 +-
 .../dynamic_precedence/grammar.json           |   2 +-
 .../inlined_aliased_rules/grammar.json        |   2 +-
 .../expected_error.txt                        |  10 +-
 .../expected_error.txt                        |   6 +-
 .../precedence_on_token/grammar.json          |   2 +-
 16 files changed, 252 insertions(+), 164 deletions(-)

diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs
index 6af85b4c..b87cc3d0 100644
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@@ -461,18 +461,20 @@ impl<'a> ParseTableBuilder<'a> {
         )
         .unwrap();
         write!(&mut msg, "Possible interpretations:\n\n").unwrap();
-        for (i, item) in conflicting_items.iter().enumerate() {
-            write!(&mut msg, "  {}:", i + 1).unwrap();
+
+        let interpretions = conflicting_items.iter().enumerate().map(|(i, item)| {
+            let mut line = String::new();
+            write!(&mut line, "  {}:", i + 1).unwrap();
 
             for preceding_symbol in preceding_symbols
                 .iter()
                 .take(preceding_symbols.len() - item.step_index as usize)
             {
-                write!(&mut msg, "  {}", self.symbol_name(preceding_symbol)).unwrap();
+                write!(&mut line, "  {}", self.symbol_name(preceding_symbol)).unwrap();
             }
 
             write!(
-                &mut msg,
+                &mut line,
                 "  ({}",
                 &self.syntax_grammar.variables[item.variable_index as usize].name
             )
@@ -480,17 +482,17 @@ impl<'a> ParseTableBuilder<'a> {
 
             for (j, step) in item.production.steps.iter().enumerate() {
                 if j as u32 == item.step_index {
-                    write!(&mut msg, "  •").unwrap();
+                    write!(&mut line, "  •").unwrap();
                 }
-                write!(&mut msg, "  {}", self.symbol_name(&step.symbol)).unwrap();
+                write!(&mut line, "  {}", self.symbol_name(&step.symbol)).unwrap();
             }
 
-            write!(&mut msg, ")").unwrap();
+            write!(&mut line, ")").unwrap();
 
             if item.is_done() {
                 write!(
-                    &mut msg,
-                    "  •  {}",
+                    &mut line,
+                    "  •  {}  …",
                     self.symbol_name(&conflicting_lookahead)
                 )
                 .unwrap();
@@ -498,16 +500,33 @@ impl<'a> ParseTableBuilder<'a> {
 
             let precedence = item.precedence();
             let associativity = item.associativity();
-            if precedence != 0 || associativity.is_some() {
-                write!(
-                    &mut msg,
+
+            let prec_line = if let Some(associativity) = associativity {
+                Some(format!(
                     "(precedence: {}, associativity: {:?})",
                     precedence, associativity
-                )
-                .unwrap();
-            }
+                ))
+            } else if precedence > 0 {
+                Some(format!("(precedence: {})", precedence))
+            } else {
+                None
+            };
 
-            write!(&mut msg, "\n").unwrap();
+            (line, prec_line)
+        }).collect::<Vec<_>>();
+
+        let max_interpretation_length = interpretions.iter().map(|i| i.0.chars().count()).max().unwrap();
+
+        for (line, prec_suffix) in interpretions {
+            msg += &line;
+            if let Some(prec_suffix) = prec_suffix {
+                for _ in line.chars().count()..max_interpretation_length {
+                    msg.push(' ');
+                }
+                msg += "  ";
+                msg += &prec_suffix;
+            }
+            msg.push('\n');
         }
 
         let mut resolution_count = 0;
@@ -517,26 +536,41 @@ impl<'a> ParseTableBuilder<'a> {
             .filter(|i| !i.is_done())
             .cloned()
             .collect::<Vec<_>>();
-        if shift_items.len() > 0 {
-            resolution_count += 1;
-            write!(
-                &mut msg,
-                "  {}:  Specify a higher precedence in",
-                resolution_count
-            )
-            .unwrap();
-            for (i, item) in shift_items.iter().enumerate() {
-                if i > 0 {
-                    write!(&mut msg, "  and").unwrap();
-                }
+        if actual_conflict.len() > 1 {
+            if shift_items.len() > 0 {
+                resolution_count += 1;
                 write!(
                     &mut msg,
-                    " `{}`",
-                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                    "  {}:  Specify a higher precedence in",
+                    resolution_count
                 )
                 .unwrap();
+                for (i, item) in shift_items.iter().enumerate() {
+                    if i > 0 {
+                        write!(&mut msg, " and").unwrap();
+                    }
+                    write!(
+                        &mut msg,
+                        " `{}`",
+                        self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                    )
+                    .unwrap();
+                }
+                write!(&mut msg, " than in the other rules.\n").unwrap();
+            }
+
+            for item in &conflicting_items {
+                if item.is_done() {
+                    resolution_count += 1;
+                    write!(
+                        &mut msg,
+                        "  {}:  Specify a higher precedence in `{}` than in the other rules.\n",
+                        resolution_count,
+                        self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                    )
+                    .unwrap();
+                }
             }
-            write!(&mut msg, " than in the other rules.\n").unwrap();
         }
 
         if considered_associativity {
@@ -553,7 +587,7 @@ impl<'a> ParseTableBuilder<'a> {
                 }
                 write!(
                     &mut msg,
-                    "{}",
+                    "`{}`",
                     self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
                 )
                 .unwrap();
@@ -561,19 +595,6 @@ impl<'a> ParseTableBuilder<'a> {
             write!(&mut msg, "\n").unwrap();
         }
 
-        for item in &conflicting_items {
-            if item.is_done() {
-                resolution_count += 1;
-                write!(
-                    &mut msg,
-                    "  {}:  Specify a higher precedence in `{}` than in the other rules.\n",
-                    resolution_count,
-                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
-                )
-                .unwrap();
-            }
-        }
-
         resolution_count += 1;
         write!(
             &mut msg,
@@ -585,7 +606,7 @@ impl<'a> ParseTableBuilder<'a> {
             if i > 0 {
                 write!(&mut msg, ", ").unwrap();
             }
-            write!(&mut msg, "{}", self.symbol_name(symbol)).unwrap();
+            write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
         }
         write!(&mut msg, "\n").unwrap();
 
diff --git a/cli/src/generate/build_tables/coincident_tokens.rs b/cli/src/generate/build_tables/coincident_tokens.rs
index 25dbc331..bb234c4a 100644
--- a/cli/src/generate/build_tables/coincident_tokens.rs
+++ b/cli/src/generate/build_tables/coincident_tokens.rs
@@ -19,10 +19,14 @@ impl<'a> CoincidentTokenIndex<'a> {
         };
         for (i, state) in table.states.iter().enumerate() {
             for symbol in state.terminal_entries.keys() {
-                for other_symbol in state.terminal_entries.keys() {
-                    let index = result.index(symbol.index, other_symbol.index);
-                    if result.entries[index].last().cloned() != Some(i) {
-                        result.entries[index].push(i);
+                if symbol.is_terminal() {
+                    for other_symbol in state.terminal_entries.keys() {
+                        if other_symbol.is_terminal() {
+                            let index = result.index(symbol.index, other_symbol.index);
+                            if result.entries[index].last().cloned() != Some(i) {
+                                result.entries[index].push(i);
+                            }
+                        }
                     }
                 }
             }
diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs
index 81c86f4a..279c5df6 100644
--- a/cli/src/generate/build_tables/item.rs
+++ b/cli/src/generate/build_tables/item.rs
@@ -402,11 +402,11 @@ impl<'a> PartialEq for ParseItem<'a> {
 
 impl<'a> Ord for ParseItem<'a> {
     fn cmp(&self, other: &Self) -> Ordering {
-        let o = self.variable_index.cmp(&other.variable_index);
+        let o = self.step_index.cmp(&other.step_index);
         if o != Ordering::Equal {
             return o;
         }
-        let o = self.step_index.cmp(&other.step_index);
+        let o = self.variable_index.cmp(&other.variable_index);
         if o != Ordering::Equal {
             return o;
         }
diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index 52c6abac..28b18109 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -312,11 +312,13 @@ fn mark_fragile_tokens(
             }
         }
         for (token, entry) in state.terminal_entries.iter_mut() {
-            for i in 0..n {
-                if token_conflict_map.does_overlap(i, token.index) {
-                    if valid_tokens_mask[i] {
-                        entry.reusable = false;
-                        break;
+            if token.is_terminal() {
+                for i in 0..n {
+                    if token_conflict_map.does_overlap(i, token.index) {
+                        if valid_tokens_mask[i] {
+                            entry.reusable = false;
+                            break;
+                        }
                     }
                 }
             }
diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs
index 9409a010..abd06769 100644
--- a/cli/src/generate/prepare_grammar/flatten_grammar.rs
+++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs
@@ -1,6 +1,9 @@
 use super::ExtractedSyntaxGrammar;
-use crate::error::Result;
-use crate::generate::grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable};
+use crate::error::{Error, Result};
+use crate::generate::rules::Symbol;
+use crate::generate::grammars::{
+    Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
+};
 use crate::generate::rules::{Alias, Associativity, Rule};
 
 struct RuleFlattener {
@@ -145,11 +148,38 @@ fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
     })
 }
 
+fn symbol_is_used(variables: &Vec<SyntaxVariable>, symbol: Symbol) -> bool {
+    for variable in variables {
+        for production in &variable.productions {
+            for step in &production.steps {
+                if step.symbol == symbol {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
 pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
     let mut variables = Vec::new();
     for variable in grammar.variables {
         variables.push(flatten_variable(variable)?);
     }
+    for (i, variable) in variables.iter().enumerate() {
+        for production in &variable.productions {
+            if production.steps.is_empty() && symbol_is_used(&variables, Symbol::non_terminal(i)) {
+                return Err(Error(format!(
+                    "The rule `{}` matches the empty string.
+
+Tree-sitter does not support syntactic rules that match the empty string
+unless they are used only as the grammar's start rule.
+",
+                    variable.name
+                )));
+            }
+        }
+    }
     Ok(SyntaxGrammar {
         extra_tokens: grammar.extra_tokens,
         expected_conflicts: grammar.expected_conflicts,
@@ -228,48 +258,55 @@ mod tests {
     #[test]
     fn test_flatten_grammar_with_maximum_dynamic_precedence() {
         let result = flatten_variable(Variable {
-          name: "test".to_string(),
-          kind: VariableType::Named,
-          rule: Rule::seq(vec![
-            Rule::non_terminal(1),
-            Rule::prec_dynamic(101, Rule::seq(vec![
-              Rule::non_terminal(2),
-              Rule::choice(vec![
-                Rule::prec_dynamic(102, Rule::seq(vec![
-                  Rule::non_terminal(3),
-                  Rule::non_terminal(4)
-                ])),
-                Rule::non_terminal(5),
-              ]),
-              Rule::non_terminal(6),
-            ])),
-            Rule::non_terminal(7),
-          ])
-        }).unwrap();
+            name: "test".to_string(),
+            kind: VariableType::Named,
+            rule: Rule::seq(vec![
+                Rule::non_terminal(1),
+                Rule::prec_dynamic(
+                    101,
+                    Rule::seq(vec![
+                        Rule::non_terminal(2),
+                        Rule::choice(vec![
+                            Rule::prec_dynamic(
+                                102,
+                                Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
+                            ),
+                            Rule::non_terminal(5),
+                        ]),
+                        Rule::non_terminal(6),
+                    ]),
+                ),
+                Rule::non_terminal(7),
+            ]),
+        })
+        .unwrap();
 
-        assert_eq!(result.productions, vec![
-            Production {
-                dynamic_precedence: 102,
-                steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1)),
-                    ProductionStep::new(Symbol::non_terminal(2)),
-                    ProductionStep::new(Symbol::non_terminal(3)),
-                    ProductionStep::new(Symbol::non_terminal(4)),
-                    ProductionStep::new(Symbol::non_terminal(6)),
-                    ProductionStep::new(Symbol::non_terminal(7)),
-                ],
-            },
-            Production {
-                dynamic_precedence: 101,
-                steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1)),
-                    ProductionStep::new(Symbol::non_terminal(2)),
-                    ProductionStep::new(Symbol::non_terminal(5)),
-                    ProductionStep::new(Symbol::non_terminal(6)),
-                    ProductionStep::new(Symbol::non_terminal(7)),
-                ],
-            },
-        ]);
+        assert_eq!(
+            result.productions,
+            vec![
+                Production {
+                    dynamic_precedence: 102,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::non_terminal(3)),
+                        ProductionStep::new(Symbol::non_terminal(4)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ],
+                },
+                Production {
+                    dynamic_precedence: 101,
+                    steps: vec![
+                        ProductionStep::new(Symbol::non_terminal(1)),
+                        ProductionStep::new(Symbol::non_terminal(2)),
+                        ProductionStep::new(Symbol::non_terminal(5)),
+                        ProductionStep::new(Symbol::non_terminal(6)),
+                        ProductionStep::new(Symbol::non_terminal(7)),
+                    ],
+                },
+            ]
+        );
     }
 
     #[test]
@@ -277,37 +314,40 @@ mod tests {
         let result = flatten_variable(Variable {
             name: "test".to_string(),
             kind: VariableType::Named,
-            rule: Rule::prec_left(101, Rule::seq(vec![
-                Rule::non_terminal(1),
-                Rule::non_terminal(2),
-            ])),
-        }).unwrap();
+            rule: Rule::prec_left(
+                101,
+                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
+            ),
+        })
+        .unwrap();
 
-        assert_eq!(result.productions, vec![
-            Production {
+        assert_eq!(
+            result.productions,
+            vec![Production {
                 dynamic_precedence: 0,
                 steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
-                    ProductionStep::new(Symbol::non_terminal(2)).with_prec(101, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::non_terminal(1))
+                        .with_prec(101, Some(Associativity::Left)),
+                    ProductionStep::new(Symbol::non_terminal(2))
+                        .with_prec(101, Some(Associativity::Left)),
                 ]
-            }
-        ]);
+            }]
+        );
 
         let result = flatten_variable(Variable {
             name: "test".to_string(),
             kind: VariableType::Named,
-            rule: Rule::prec_left(101, Rule::seq(vec![
-                Rule::non_terminal(1),
-            ])),
-        }).unwrap();
+            rule: Rule::prec_left(101, Rule::seq(vec![Rule::non_terminal(1)])),
+        })
+        .unwrap();
 
-        assert_eq!(result.productions, vec![
-            Production {
+        assert_eq!(
+            result.productions,
+            vec![Production {
                 dynamic_precedence: 0,
-                steps: vec![
-                    ProductionStep::new(Symbol::non_terminal(1)).with_prec(101, Some(Associativity::Left)),
-                ]
-            }
-        ]);
+                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
+                    .with_prec(101, Some(Associativity::Left)),]
+            }]
+        );
     }
 }
diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs
index a7248817..8b07309b 100644
--- a/cli/src/generate/prepare_grammar/intern_symbols.rs
+++ b/cli/src/generate/prepare_grammar/intern_symbols.rs
@@ -8,7 +8,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
 
     if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
         return Err(Error(
-            "Grammar's start rule must be visible".to_string(),
+            "A grammar's start rule must be visible.".to_string(),
         ));
     }
 
diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs
index 5e0d2b67..55bfbfa2 100644
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@@ -191,13 +191,11 @@ impl Generator {
             "#define EXTERNAL_TOKEN_COUNT {}",
             self.syntax_grammar.external_tokens.len()
         );
-        if self.parse_table.max_aliased_production_length > 0 {
-            add_line!(
-                self,
-                "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
-                self.parse_table.max_aliased_production_length
-            );
-        }
+        add_line!(
+            self,
+            "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
+            self.parse_table.max_aliased_production_length
+        );
         add_line!(self, "");
     }
 
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index 624786fc..eeea113c 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -3,7 +3,7 @@ use crate::generate;
 use crate::loader::Loader;
 use crate::test::{parse_tests, TestEntry};
 use std::fs;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 use tree_sitter::{Language, Parser};
 
 lazy_static! {
@@ -19,6 +19,7 @@ lazy_static! {
     static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
     static ref SCRATCH_DIR: PathBuf = ROOT_DIR.join("target").join("scratch");
     static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
+    static ref EXEC_PATH: PathBuf = std::env::current_exe().unwrap();
 }
 
 #[test]
@@ -38,27 +39,42 @@ fn test_real_language_corpus_files() {
 fn test_feature_corpus_files() {
     fs::create_dir_all(SCRATCH_DIR.as_path()).unwrap();
 
+    let filter = std::env::var("TREE_SITTER_TEST_FILTER").ok();
     let mut loader = Loader::new(SCRATCH_DIR.clone());
     let mut parser = Parser::new();
     let test_grammars_dir = FIXTURES_DIR.join("test_grammars");
 
     for entry in fs::read_dir(&test_grammars_dir).unwrap() {
         let entry = entry.unwrap();
+        if !entry.metadata().unwrap().is_dir() {
+            continue;
+        }
         let test_name = entry.file_name();
         let test_name = test_name.to_str().unwrap();
 
-        eprintln!("test name: {}", test_name);
+        if let Some(filter) = filter.as_ref() {
+            if !test_name.contains(filter.as_str()) {
+                continue;
+            }
+        }
+
+        eprintln!("test: {:?}", test_name);
+
         let test_path = entry.path();
         let grammar_path = test_path.join("grammar.json");
-        let corpus_path = test_path.join("corpus.txt");
         let error_message_path = test_path.join("expected_error.txt");
-
         let grammar_json = fs::read_to_string(grammar_path).unwrap();
         let generate_result = generate::generate_parser_for_grammar(&grammar_json);
+
         if error_message_path.exists() {
-            continue;
+            let expected_message = fs::read_to_string(&error_message_path).unwrap();
             if let Err(e) = generate_result {
-                assert_eq!(e.0, fs::read_to_string(&error_message_path).unwrap());
+                if e.0 != expected_message {
+                    panic!(
+                        "Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n",
+                        expected_message, e.0
+                    );
+                }
             } else {
                 panic!(
                     "Expected error message but got none for test grammar '{}'",
@@ -66,9 +82,15 @@ fn test_feature_corpus_files() {
                 );
             }
         } else {
+            let corpus_path = test_path.join("corpus.txt");
             let c_code = generate_result.unwrap();
             let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", test_name));
-            fs::write(&parser_c_path, c_code).unwrap();
+            if !fs::read_to_string(&parser_c_path)
+                .map(|content| content == c_code)
+                .unwrap_or(false)
+            {
+                fs::write(&parser_c_path, c_code).unwrap();
+            }
             let scanner_path = test_path.join("scanner.c");
             let scanner_path = if scanner_path.exists() {
                 Some(scanner_path)
@@ -78,6 +100,7 @@ fn test_feature_corpus_files() {
             let language = loader
                 .load_language_from_sources(test_name, &HEADER_DIR, &parser_c_path, &scanner_path)
                 .unwrap();
+            let test = parse_tests(&corpus_path).unwrap();
         }
     }
 
diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt
index 2c710346..94d1caa4 100644
--- a/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt
+++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt
@@ -1,14 +1,14 @@
 Unresolved conflict for symbol sequence:
 
-  '['  identifier  •  ']'  …
+  '['  identifier  •  identifier  …
 
 Possible interpretations:
 
-  1:  '['  (array_repeat1  identifier)  •  ']'  …
-  2:  '['  (array_type_repeat1  identifier)  •  ']'  …
+  1:  '['  (array_type_repeat1  identifier)  •  identifier  …
+  2:  '['  (array_repeat1  identifier)  •  identifier  …
 
 Possible resolutions:
 
-  1:  Specify a higher precedence in `array_repeat1` than in the other rules.
-  2:  Specify a higher precedence in `array_type_repeat1` than in the other rules.
-  3:  Add a conflict for these rules: `array` `array_type`
+  1:  Specify a higher precedence in `array_type_repeat1` than in the other rules.
+  2:  Specify a higher precedence in `array_repeat1` than in the other rules.
+  3:  Add a conflict for these rules: `array`, `array_type`
diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt b/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt
index cbb3e02c..4a81f0ef 100644
--- a/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt
+++ b/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt
@@ -1,14 +1,14 @@
 Unresolved conflict for symbol sequence:
 
-  _program_start  '['  identifier  •  ']'  …
+  _program_start  '['  identifier  •  identifier  …
 
 Possible interpretations:
 
-  1:  _program_start  '['  (array_repeat1  identifier)  •  ']'  …
-  2:  _program_start  '['  (array_type_repeat1  identifier)  •  ']'  …
+  1:  _program_start  '['  (array_type_repeat1  identifier)  •  identifier  …
+  2:  _program_start  '['  (array_repeat1  identifier)  •  identifier  …
 
 Possible resolutions:
 
-  1:  Specify a higher precedence in `array_repeat1` than in the other rules.
-  2:  Specify a higher precedence in `array_type_repeat1` than in the other rules.
-  3:  Add a conflict for these rules: `array` `array_type`
+  1:  Specify a higher precedence in `array_type_repeat1` than in the other rules.
+  2:  Specify a higher precedence in `array_repeat1` than in the other rules.
+  3:  Add a conflict for these rules: `array`, `array_type`
diff --git a/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt b/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt
index ce7090a3..ea23b072 100644
--- a/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt
+++ b/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt
@@ -4,12 +4,12 @@ Unresolved conflict for symbol sequence:
 
 Possible interpretations:
 
-  1:  expression  '+'  (product  expression  •  '*'  expression)
-  2:  expression  '+'  (other_thing  expression  •  '*'  '*')
-  3:  (sum  expression  '+'  expression)  •  '*'  …
+  1:  expression  '+'  (product  expression  •  '*'  expression)  (precedence: 1, associativity: Left)
+  2:  expression  '+'  (other_thing  expression  •  '*'  '*')     (precedence: -1, associativity: Left)
+  3:  (sum  expression  '+'  expression)  •  '*'  …               (precedence: 0, associativity: Left)
 
 Possible resolutions:
 
   1:  Specify a higher precedence in `product` and `other_thing` than in the other rules.
   2:  Specify a higher precedence in `sum` than in the other rules.
-  3:  Add a conflict for these rules: `sum` `product` `other_thing`
+  3:  Add a conflict for these rules: `sum`, `product`, `other_thing`
diff --git a/test/fixtures/test_grammars/dynamic_precedence/grammar.json b/test/fixtures/test_grammars/dynamic_precedence/grammar.json
index 381ed4c2..1a7e04ab 100644
--- a/test/fixtures/test_grammars/dynamic_precedence/grammar.json
+++ b/test/fixtures/test_grammars/dynamic_precedence/grammar.json
@@ -14,7 +14,7 @@
       "type": "CHOICE",
       "members": [
         {"type": "SYMBOL", "name": "declaration"},
-        {"type": "SYMBOL", "name": "expression"},
+        {"type": "SYMBOL", "name": "expression"}
       ]
     },
 
diff --git a/test/fixtures/test_grammars/inlined_aliased_rules/grammar.json b/test/fixtures/test_grammars/inlined_aliased_rules/grammar.json
index bdf01789..d98f6e6c 100644
--- a/test/fixtures/test_grammars/inlined_aliased_rules/grammar.json
+++ b/test/fixtures/test_grammars/inlined_aliased_rules/grammar.json
@@ -44,7 +44,7 @@
           {"type": "SYMBOL", "name": "expression"},
           {"type": "STRING", "value": "("},
           {"type": "SYMBOL", "name": "expression"},
-          {"type": "STRING", "value": ")"},
+          {"type": "STRING", "value": ")"}
         ]
       }
     },
diff --git a/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt b/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt
index 201bdf98..a8699897 100644
--- a/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt
+++ b/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt
@@ -4,11 +4,11 @@ Unresolved conflict for symbol sequence:
 
 Possible interpretations:
 
-  1:  (unary_a  '!'  expression)  •  '<'  …
-  2:  (unary_b  '!'  expression)  •  '<'  …
+  1:  (unary_b  '!'  expression)  •  '<'  …  (precedence: 2)
+  2:  (unary_a  '!'  expression)  •  '<'  …  (precedence: 2)
 
 Possible resolutions:
 
-  1:  Specify a higher precedence in `unary_a` than in the other rules.
-  2:  Specify a higher precedence in `unary_b` than in the other rules.
-  3:  Add a conflict for these rules: `unary_a` `unary_b`
+  1:  Specify a higher precedence in `unary_b` than in the other rules.
+  2:  Specify a higher precedence in `unary_a` than in the other rules.
+  3:  Add a conflict for these rules: `unary_a`, `unary_b`
diff --git a/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt b/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
index 6ee80f23..557f1837 100644
--- a/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
+++ b/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
@@ -4,12 +4,12 @@ Unresolved conflict for symbol sequence:
 
 Possible interpretations:
 
-  1:  identifier  (expression  identifier)  •  '{'  …
-  2:  identifier  (function_call  identifier  •  block)
+  1:  identifier  (function_call  identifier  •  block)  (precedence: 0, associativity: Right)
+  2:  identifier  (expression  identifier)  •  '{'  …
 
 Possible resolutions:
 
   1:  Specify a higher precedence in `function_call` than in the other rules.
   2:  Specify a higher precedence in `expression` than in the other rules.
   3:  Specify a left or right associativity in `expression`
-  4:  Add a conflict for these rules: `expression` `function_call`
+  4:  Add a conflict for these rules: `expression`, `function_call`
diff --git a/test/fixtures/test_grammars/precedence_on_token/grammar.json b/test/fixtures/test_grammars/precedence_on_token/grammar.json
index 1b1ef7ea..8ba7e69a 100644
--- a/test/fixtures/test_grammars/precedence_on_token/grammar.json
+++ b/test/fixtures/test_grammars/precedence_on_token/grammar.json
@@ -3,7 +3,7 @@
 
   "extras": [
     {"type": "SYMBOL", "name": "comment"},
-    {"type": "PATTERN", "value": "\\s"},
+    {"type": "PATTERN", "value": "\\s"}
   ],
 
   "rules": {

From 1468b349b59667171f0776c1bdf2689e0ffc1a68 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 17:39:16 -0800
Subject: [PATCH 131/208] Ensure 'src' directory exists before writing
 src/parser.c

---
 cli/src/generate/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 5d89bbfe..283ab0b2 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -36,6 +36,7 @@ pub fn generate_parser_in_directory(
         let grammar_json = load_js_grammar_file(&repo_path.join("grammar.js"));
         let c_code =
             generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
+        fs::create_dir_all("src")?;
         fs::write(repo_path.join("src").join("parser.c"), c_code)?;
     }
     properties::generate_property_sheets(repo_path)?;

From c76a155174c841a9f85b5a73682c5b090af739d1 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 17:43:27 -0800
Subject: [PATCH 132/208] Fix escaping of characters in C strings

---
 cli/src/generate/render.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs
index 55bfbfa2..a3e20536 100644
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@@ -931,10 +931,14 @@ impl Generator {
     fn sanitize_string(&self, name: &str) -> String {
         let mut result = String::with_capacity(name.len());
         for c in name.chars() {
-            if ['\\', '\n', '\r', '\"'].contains(&c) {
-                result.push('\\');
+            match c {
+                '\"' => result += "\\\"",
+                '\\' => result += "\\\\",
+                '\t' => result += "'\\t'",
+                '\n' => result += "'\\n'",
+                '\r' => result += "'\\r'",
+                _ => result.push(c),
             }
-            result.push(c);
         }
         result
     }

From 6f242fda0c25cc9271478b13440ae39e89d928ca Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 11 Jan 2019 17:43:42 -0800
Subject: [PATCH 133/208] Fix edge case in flatten rule

---
 .../generate/prepare_grammar/flatten_grammar.rs | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs
index abd06769..204ceb07 100644
--- a/cli/src/generate/prepare_grammar/flatten_grammar.rs
+++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs
@@ -31,13 +31,15 @@ impl RuleFlattener {
         self.production
     }
 
-    fn apply(&mut self, rule: Rule, at_end: bool) {
+    fn apply(&mut self, rule: Rule, at_end: bool) -> bool {
         match rule {
             Rule::Seq(members) => {
+                let mut result = false;
                 let last_index = members.len() - 1;
                 for (i, member) in members.into_iter().enumerate() {
-                    self.apply(member, i == last_index && at_end);
+                    result |= self.apply(member, i == last_index && at_end);
                 }
+                result
             }
             Rule::Metadata { rule, params } => {
                 let mut has_precedence = false;
@@ -62,11 +64,11 @@ impl RuleFlattener {
                     self.production.dynamic_precedence = params.dynamic_precedence;
                 }
 
-                self.apply(*rule, at_end);
+                let did_push = self.apply(*rule, at_end);
 
                 if has_precedence {
                     self.precedence_stack.pop();
-                    if !at_end {
+                    if did_push && !at_end {
                         self.production.steps.last_mut().unwrap().precedence =
                             self.precedence_stack.last().cloned().unwrap_or(0);
                     }
@@ -74,7 +76,7 @@ impl RuleFlattener {
 
                 if has_associativity {
                     self.associativity_stack.pop();
-                    if !at_end {
+                    if did_push && !at_end {
                         self.production.steps.last_mut().unwrap().associativity =
                             self.associativity_stack.last().cloned();
                     }
@@ -83,6 +85,8 @@ impl RuleFlattener {
                 if has_alias {
                     self.alias_stack.pop();
                 }
+
+                did_push
             }
             Rule::Symbol(symbol) => {
                 self.production.steps.push(ProductionStep {
@@ -91,8 +95,9 @@ impl RuleFlattener {
                     associativity: self.associativity_stack.last().cloned(),
                     alias: self.alias_stack.last().cloned(),
                 });
+                true
             }
-            _ => (),
+            _ => false,
         }
     }
 }

From 545e840a082b8873ed2a85f5bb98196a06e419de Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 12 Jan 2019 21:42:31 -0800
Subject: [PATCH 134/208] Remove stray single quotes in symbol name strings

---
 cli/src/generate/render.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs
index a3e20536..c164ee1b 100644
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@@ -934,9 +934,9 @@ impl Generator {
             match c {
                 '\"' => result += "\\\"",
                 '\\' => result += "\\\\",
-                '\t' => result += "'\\t'",
-                '\n' => result += "'\\n'",
-                '\r' => result += "'\\r'",
+                '\t' => result += "\\t",
+                '\n' => result += "\\n",
+                '\r' => result += "\\r",
                 _ => result.push(c),
             }
         }

From 2e009f7177b5b409399c5de80ed330717a0b5522 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 12 Jan 2019 21:57:34 -0800
Subject: [PATCH 135/208] Avoid writing empty initializer list for alias
 sequences

---
 cli/src/generate/render.rs | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs
index c164ee1b..9b09c6ad 100644
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@@ -66,7 +66,10 @@ impl Generator {
         self.add_symbol_enum();
         self.add_symbol_names_list();
         self.add_symbol_metadata_list();
-        self.add_alias_sequences();
+
+        if self.parse_table.alias_sequences.len() > 1 {
+            self.add_alias_sequences();
+        }
 
         let mut main_lex_table = LexTable::default();
         swap(&mut main_lex_table, &mut self.main_lex_table);
@@ -750,10 +753,13 @@ impl Generator {
         add_line!(self, ".parse_actions = ts_parse_actions,");
         add_line!(self, ".lex_modes = ts_lex_modes,");
         add_line!(self, ".symbol_names = ts_symbol_names,");
-        add_line!(
-            self,
-            ".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
-        );
+
+        if self.parse_table.alias_sequences.len() > 1 {
+            add_line!(
+                self,
+                ".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
+            );
+        }
 
         add_line!(
             self,

From e2717a6ad14c6d1db056b55e89526b70eeb48a83 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 14 Jan 2019 14:05:19 -0800
Subject: [PATCH 136/208] Preprocess regexes to allow non-standard escape
 sequences

Also allow unescaped curly braces to match literal curly braces when
they don't form a valid repetition operator.
---
 cli/src/generate/dsl.js                       |  5 --
 .../generate/prepare_grammar/expand_tokens.rs | 65 +++++++++++++++++++
 2 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js
index ba3962cd..fa60dfa7 100644
--- a/cli/src/generate/dsl.js
+++ b/cli/src/generate/dsl.js
@@ -1,5 +1,4 @@
 const UNICODE_ESCAPE_PATTERN = /\\u([0-9a-f]{4})/gi;
-const DELIMITER_ESCAPE_PATTERN = /\\\//g;
 
 function alias(rule, value) {
   const result = {
@@ -150,10 +149,6 @@ function normalize(value) {
       return {
           type: 'PATTERN',
           value: value.source
-            .replace(
-              DELIMITER_ESCAPE_PATTERN,
-              '/'
-            )
             .replace(
               UNICODE_ESCAPE_PATTERN,
               (match, group) => String.fromCharCode(parseInt(group, 16))
diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
index e269df6d..1e2ef2e5 100644
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -6,8 +6,15 @@ use crate::generate::rules::Rule;
 use regex_syntax::ast::{
     parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
 };
+use regex::Regex;
 use std::i32;
 
+lazy_static! {
+    static ref CURLY_BRACE_REGEX: Regex = Regex::new(r#"(^|[^\\])\{([^}]*[^0-9}][^}]*)\}"#).unwrap();
+}
+
+const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
+
 struct NfaBuilder {
     nfa: Nfa,
     is_sep: bool,
@@ -35,6 +42,31 @@ fn get_completion_precedence(rule: &Rule) -> i32 {
     }
 }
 
+fn preprocess_regex(content: &str) -> String {
+    let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}");
+    let mut result = String::with_capacity(content.len());
+    let mut is_escaped = false;
+    for c in content.chars() {
+        if is_escaped {
+            if ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) {
+                result.push(c);
+            } else {
+                result.push('\\');
+                result.push(c);
+            }
+            is_escaped = false;
+        } else if c == '\\' {
+            is_escaped = true;
+        } else {
+            result.push(c);
+        }
+    }
+    if is_escaped {
+        result.push('\\');
+    }
+    result
+}
+
 pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
     let mut builder = NfaBuilder {
         nfa: Nfa::new(),
@@ -90,6 +122,7 @@ impl NfaBuilder {
     fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
         match rule {
             Rule::Pattern(s) => {
+                let s = preprocess_regex(s);
                 let ast = parse::Parser::new()
                     .parse(&s)
                     .map_err(|e| Error(e.to_string()))?;
@@ -586,6 +619,38 @@ mod tests {
                     ("12e34", Some((0, "12e34"))),
                 ],
             },
+            // Allowing unrecognized escape sequences
+            Row {
+                rules: vec![
+                    // Escaped forward slash (used in JS because '/' is the regex delimiter)
+                    Rule::pattern(r#"\/"#),
+                    // Escaped quotes
+                    Rule::pattern(r#"\"\'"#),
+                    // Quote preceded by a literal backslash
+                    Rule::pattern(r#"[\\']+"#),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("/", Some((0, "/"))),
+                    ("\"\'", Some((1, "\"\'"))),
+                    (r#"'\'a"#, Some((2, r#"'\'"#))),
+                ],
+            },
+            // Allowing un-escaped curly braces
+            Row {
+                rules: vec![
+                    // Un-escaped curly braces
+                    Rule::pattern(r#"u{[0-9a-fA-F]+}"#),
+                    // Already-escaped curly braces
+                    Rule::pattern(r#"\{[ab]{3}\}"#),
+                ],
+                separators: vec![],
+                examples: vec![
+                    ("u{1234} ok", Some((0, "u{1234}"))),
+                    ("{aba}}", Some((1, "{aba}"))),
+                ],
+
+            }
         ];
 
         for Row {

From 8f48240bf1e7d654e127ed1147df59be93c07db0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 14 Jan 2019 14:06:22 -0800
Subject: [PATCH 137/208] Allow building the C code with static analysis

---
 lib/build.rs | 41 +++++++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/lib/build.rs b/lib/build.rs
index 7e8714ef..e4d1f91a 100644
--- a/lib/build.rs
+++ b/lib/build.rs
@@ -5,16 +5,7 @@ use std::path::{Path, PathBuf};
 use std::fs;
 
 fn main() {
-    let mut config = cc::Build::new();
-    config
-        .define("UTF8PROC_STATIC", "")
-        .flag_if_supported("-std=c99")
-        .flag_if_supported("-Wno-unused-parameter")
-        .include("include")
-        .include("utf8proc")
-        .file(Path::new("src").join("lib.c"))
-        .compile("tree-sitter");
-
+    println!("cargo:rerun-if-env-changed=TREE_SITTER_TEST");
     if env::var("TREE_SITTER_TEST").is_ok() {
         let mut parser_config = cc::Build::new();
         parser_config
@@ -65,4 +56,34 @@ fn main() {
         scanner_c_config.compile("fixture-scanners-c");
         scanner_cxx_config.compile("fixture-scanners-cxx");
     }
+
+    println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS");
+    if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() {
+        let clang_path = which("clang").unwrap();
+        let clang_path = clang_path.to_str().unwrap();
+        env::set_var("CC", &format!("scan-build -analyze-headers --use-analyzer={} cc", clang_path));
+    }
+
+    let mut config = cc::Build::new();
+    config
+        .define("UTF8PROC_STATIC", "")
+        .flag_if_supported("-std=c99")
+        .flag_if_supported("-Wno-unused-parameter")
+        .include("include")
+        .include("utf8proc")
+        .file(Path::new("src").join("lib.c"))
+        .compile("tree-sitter");
+}
+
+fn which(exe_name: impl AsRef<Path>) -> Option<PathBuf> {
+    env::var_os("PATH").and_then(|paths| {
+        env::split_paths(&paths).find_map(|dir| {
+            let full_path = dir.join(&exe_name);
+            if full_path.is_file() {
+                Some(full_path)
+            } else {
+                None
+            }
+        })
+    })
 }

From def5884b59495fbe3ff199f199eee58731f5398e Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 14 Jan 2019 14:07:42 -0800
Subject: [PATCH 138/208] Allow passing grammar JS or JSON path to `generate`
 command

---
 cli/src/generate/mod.rs        | 12 +++++++++++-
 cli/src/generate/properties.rs | 22 ++++++++++++----------
 cli/src/main.rs                |  3 +++
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 283ab0b2..1593c0da 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -28,12 +28,14 @@ lazy_static! {
 
 pub fn generate_parser_in_directory(
     repo_path: &PathBuf,
+    grammar_path: Option<&str>,
     minimize: bool,
     state_ids_to_log: Vec<usize>,
     properties_only: bool,
 ) -> Result<()> {
     if !properties_only {
-        let grammar_json = load_js_grammar_file(&repo_path.join("grammar.js"));
+        let grammar_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
+        let grammar_json = load_grammar_file(&grammar_path);
         let c_code =
             generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
         fs::create_dir_all("src")?;
@@ -77,6 +79,14 @@ fn generate_parser_for_grammar_with_opts(
     ))
 }
 
+fn load_grammar_file(grammar_path: &PathBuf) -> String {
+    match grammar_path.extension().and_then(|e| e.to_str()) {
+        Some("js") => load_js_grammar_file(grammar_path),
+        Some("json") => fs::read_to_string(grammar_path).expect("Failed to read grammar file"),
+        _ => panic!("Unknown grammar file extension"),
+    }
+}
+
 fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
     let mut node_process = Command::new("node")
         .stdin(Stdio::piped())
diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index cca7fef8..e1492d6f 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -424,16 +424,18 @@ pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
     let src_dir_path = repo_path.join("src");
     let properties_dir_path = repo_path.join("properties");
 
-    for entry in fs::read_dir(properties_dir_path)? {
-        let css_path = entry?.path();
-        let css = fs::read_to_string(&css_path)?;
-        let sheet = generate_property_sheet(&css_path, &css)?;
-        let property_sheet_json_path = src_dir_path
-            .join(css_path.file_name().unwrap())
-            .with_extension("json");
-        let property_sheet_json_file = File::create(property_sheet_json_path)?;
-        let mut writer = BufWriter::new(property_sheet_json_file);
-        serde_json::to_writer_pretty(&mut writer, &sheet)?;
+    if let Ok(entries) = fs::read_dir(properties_dir_path) {
+        for entry in entries {
+            let css_path = entry?.path();
+            let css = fs::read_to_string(&css_path)?;
+            let sheet = generate_property_sheet(&css_path, &css)?;
+            let property_sheet_json_path = src_dir_path
+                .join(css_path.file_name().unwrap())
+                .with_extension("json");
+            let property_sheet_json_file = File::create(property_sheet_json_path)?;
+            let mut writer = BufWriter::new(property_sheet_json_file);
+            serde_json::to_writer_pretty(&mut writer, &sheet)?;
+        }
     }
 
     Ok(())
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 5a830458..80a40758 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -43,6 +43,7 @@ fn run() -> error::Result<()> {
         .subcommand(
             SubCommand::with_name("generate")
                 .about("Generate a parser")
+                .arg(Arg::with_name("grammar-path").index(1))
                 .arg(Arg::with_name("log").long("log"))
                 .arg(Arg::with_name("properties-only").long("properties"))
                 .arg(
@@ -84,6 +85,7 @@ fn run() -> error::Result<()> {
             logger::init();
         }
 
+        let grammar_path = matches.value_of("grammar-path");
         let minimize = !matches.is_present("no-minimize");
         let properties_only = matches.is_present("properties-only");
         let state_ids_to_log = matches
@@ -94,6 +96,7 @@ fn run() -> error::Result<()> {
             });
         generate::generate_parser_in_directory(
             &current_dir,
+            grammar_path,
             minimize,
             state_ids_to_log,
             properties_only,

From 19b2addcc42f89e17cb34ecef906c29203dacb9e Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 14 Jan 2019 14:08:07 -0800
Subject: [PATCH 139/208] Fix bug in symbol enum code generation

---
 cli/src/generate/render.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs
index 9b09c6ad..1da7f99d 100644
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@@ -215,8 +215,8 @@ impl Generator {
         for (alias, symbol) in &self.alias_map {
             if symbol.is_none() {
                 add_line!(self, "{} = {},", self.alias_ids[&alias], i);
+                i += 1;
             }
-            i += 1;
         }
         dedent!(self);
         add_line!(self, "}};");

From b1fa49448d3c18e2d44b5e35e59795e0e7aa9078 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 14 Jan 2019 14:39:01 -0800
Subject: [PATCH 140/208] Regenerate parsers on CI

---
 .appveyor.yml                  | 17 ++++++++++-------
 .travis.yml                    | 15 +++++++++++----
 script/format                  |  7 -------
 script/lint                    | 14 --------------
 script/regenerate-fixtures     | 27 +++++++++++++++++++++++++++
 script/regenerate-fixtures.cmd | 22 ++++++++++++++++++++++
 6 files changed, 70 insertions(+), 32 deletions(-)
 delete mode 100755 script/format
 delete mode 100755 script/lint
 create mode 100755 script/regenerate-fixtures
 create mode 100644 script/regenerate-fixtures.cmd

diff --git a/.appveyor.yml b/.appveyor.yml
index 147827e9..f46e34e6 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,9 +1,4 @@
-environment:
-  RUST_BACKTRACE: full
-  TREE_SITTER_TEST: true
-
 build: false
-
 install:
   # Install rust
   - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
@@ -14,10 +9,18 @@ install:
 
   # Install dependencies
   - git submodule update --init
-  - script\fetch-fixtures.cmd
+
+environment:
+  RUST_BACKTRACE: full
 
 test_script:
-  - cargo build
+  # Fetch and regenerate the fixture parsers
+  - script\fetch-fixtures.cmd
+  - cargo build --release
+  - script\regenerate-fixtures.cmd
+
+  # Run tests
+  - set TREE_SITTER_TEST=1
   - cargo test
 
 branches:
diff --git a/.travis.yml b/.travis.yml
index 0d4f8cd5..c281ef95 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,13 +1,20 @@
 language: rust
-
 rust:
   - stable
 
 env:
-  - TREE_SITTER_TEST=1 RUST_BACKTRACE=full
+  - RUST_BACKTRACE=full
 
-before_install:
-  - ./script/fetch-fixtures
+script:
+  # Fetch and regenerate the fixture parsers
+  - script/fetch-fixtures
+  - cargo build --release
+  - script/regenerate-fixtures
+
+  # Run tests
+  - export TREE_SITTER_TEST=1
+  - export TREE_SITTER_STATIC_ANALYSIS=1
+  - cargo test
 
 branches:
   only:
diff --git a/script/format b/script/format
deleted file mode 100755
index 1aa8fbee..00000000
--- a/script/format
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-
-DIRS="${*:-src include}"
-
-find $DIRS                                       \
-  -name '*.c' -or -name '*.cc' -or -name '*.h' | \
-  xargs clang-format -i -style=file
diff --git a/script/lint b/script/lint
deleted file mode 100755
index 3d6a03dc..00000000
--- a/script/lint
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env bash
-
-CPPLINT=externals/cpplint.py
-CPPLINT_URL=http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
-
-if [[ ! -f $CPPLINT ]]; then
-  curl $CPPLINT_URL > $CPPLINT
-  chmod +x $CPPLINT
-fi
-
-FILTERS='--filter=-legal/copyright,-readability/todo,-build/c++11'
-
-$CPPLINT --linelength=90 --root=include $FILTERS include/tree_sitter/compiler.h 2>&1
-$CPPLINT --linelength=90 --root=src $FILTERS $(find src/compiler -type f) 2>&1
diff --git a/script/regenerate-fixtures b/script/regenerate-fixtures
new file mode 100755
index 00000000..15e3c09d
--- /dev/null
+++ b/script/regenerate-fixtures
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+set -e
+
+root_dir=$PWD
+tree_sitter=${root_dir}/target/release/tree-sitter
+grammars_dir=${root_dir}/test/fixtures/grammars
+
+grammar_names=(
+  bash
+  c
+  cpp
+  embedded-template
+  go
+  html
+  javascript
+  json
+  python
+  rust
+)
+
+for grammar_name in "${grammar_names[@]}"; do
+  echo "Regenerating ${grammar_name} parser"
+  cd ${grammars_dir}/${grammar_name}
+  $tree_sitter generate src/grammar.json
+  cd $PWD
+done
diff --git a/script/regenerate-fixtures.cmd b/script/regenerate-fixtures.cmd
new file mode 100644
index 00000000..739bdba1
--- /dev/null
+++ b/script/regenerate-fixtures.cmd
@@ -0,0 +1,22 @@
+@echo off
+
+call:regenerate bash
+call:regenerate c
+call:regenerate cpp
+call:regenerate embedded-template
+call:regenerate go
+call:regenerate html
+call:regenerate javascript
+call:regenerate json
+call:regenerate python
+call:regenerate rust
+EXIT /B 0
+
+:regenerate
+SETLOCAL
+SET tree_sitter=%cd%\target\release\tree-sitter
+SET grammar_dir=test\fixtures\grammars\%~1
+pushd %grammar_dir%
+%tree_sitter% generate src\grammar.json
+popd
+EXIT /B 0

From 5c3c1dd0bd28fec0ab20d77c288f1b66b9b90a0f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 14 Jan 2019 17:19:46 -0800
Subject: [PATCH 141/208] Get logging flags working properly with test script

---
 .appveyor.yml                                 |   5 +-
 .travis.yml                                   |   6 +-
 cli/src/parse.rs                              |   9 +-
 cli/src/test.rs                               |  10 +-
 cli/src/tests/corpuses.rs                     |  95 +++++++++---
 cli/src/tests/languages.rs                    |   2 +
 cli/src/tests/parser_api.rs                   |   4 -
 cli/src/util.rs                               |  55 ++++---
 lib/binding/lib.rs                            |   2 +-
 script/clean                                  |   7 -
 script/configure                              |   7 -
 script/configure.cmd                          |   3 -
 script/{bindgen.sh => generate-bindings}      |   0
 script/test                                   | 139 +++---------------
 script/test.cmd                               |  10 +-
 script/test.sh                                |   3 -
 script/trim-whitespace                        |   3 -
 .../corpus.txt                                |   1 +
 .../grammar.json                              |   4 +-
 19 files changed, 140 insertions(+), 225 deletions(-)
 delete mode 100755 script/clean
 delete mode 100755 script/configure
 delete mode 100644 script/configure.cmd
 rename script/{bindgen.sh => generate-bindings} (100%)
 delete mode 100755 script/test.sh
 delete mode 100755 script/trim-whitespace

diff --git a/.appveyor.yml b/.appveyor.yml
index f46e34e6..29193a53 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -10,9 +10,6 @@ install:
   # Install dependencies
   - git submodule update --init
 
-environment:
-  RUST_BACKTRACE: full
-
 test_script:
   # Fetch and regenerate the fixture parsers
   - script\fetch-fixtures.cmd
@@ -21,7 +18,7 @@ test_script:
 
   # Run tests
   - set TREE_SITTER_TEST=1
-  - cargo test
+  - script\test.cmd
 
 branches:
   only:
diff --git a/.travis.yml b/.travis.yml
index c281ef95..5f981ce9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,9 +2,6 @@ language: rust
 rust:
   - stable
 
-env:
-  - RUST_BACKTRACE=full
-
 script:
   # Fetch and regenerate the fixture parsers
   - script/fetch-fixtures
@@ -12,9 +9,8 @@ script:
   - script/regenerate-fixtures
 
   # Run tests
-  - export TREE_SITTER_TEST=1
   - export TREE_SITTER_STATIC_ANALYSIS=1
-  - cargo test
+  - script/test
 
 branches:
   only:
diff --git a/cli/src/parse.rs b/cli/src/parse.rs
index fde148b6..38b6a61c 100644
--- a/cli/src/parse.rs
+++ b/cli/src/parse.rs
@@ -11,14 +11,13 @@ pub fn parse_file_at_path(
     debug: bool,
     debug_graph: bool,
 ) -> Result<()> {
+    let mut log_session = None;
     let mut parser = Parser::new();
     parser.set_language(language)?;
     let source_code = fs::read_to_string(path)?;
 
-    let mut log_session = None;
-
     if debug_graph {
-        log_session = Some(util::start_logging_graphs(&mut parser, "log.html")?);
+        log_session = Some(util::log_graphs(&mut parser, "log.html")?);
     } else if debug {
         parser.set_logger(Some(Box::new(|log_type, message| {
             if log_type == LogType::Lex {
@@ -32,9 +31,7 @@ pub fn parse_file_at_path(
         .parse_str(&source_code, None)
         .expect("Incompatible language version");
 
-    if let Some(log_session) = log_session {
-        util::stop_logging_graphs(&mut parser, log_session)?;
-    }
+    drop(log_session);
 
     let stdout = io::stdout();
     let mut stdout = stdout.lock();
diff --git a/cli/src/test.rs b/cli/src/test.rs
index e064dffd..bcea3dcc 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -44,13 +44,12 @@ pub fn run_tests_at_path(
     filter: Option<&str>,
 ) -> Result<()> {
     let test_entry = parse_tests(path)?;
+    let mut log_session = None;
     let mut parser = Parser::new();
     parser.set_language(language)?;
 
-    let mut log_session = None;
-
     if debug_graph {
-        log_session = Some(util::start_logging_graphs(&mut parser, "log.html")?);
+        log_session = Some(util::log_graphs(&mut parser, "log.html")?);
     } else if debug {
         parser.set_logger(Some(Box::new(|log_type, message| {
             if log_type == LogType::Lex {
@@ -103,10 +102,7 @@ pub fn run_tests_at_path(
         }
     }
 
-    if let Some(log_session) = log_session {
-        util::stop_logging_graphs(&mut parser, log_session)?;
-    }
-
+    drop(log_session);
     Ok(())
 }
 
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index eeea113c..e1fe9189 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -2,12 +2,14 @@ use super::languages;
 use crate::generate;
 use crate::loader::Loader;
 use crate::test::{parse_tests, TestEntry};
+use crate::util;
 use std::fs;
 use std::path::PathBuf;
-use tree_sitter::{Language, Parser};
+use tree_sitter::{Language, Parser, LogType};
 
 lazy_static! {
-    static ref LANGUAGES: [(&'static str, Language); 6] = [
+    static ref LANGUAGES: [(&'static str, Language); 7] = [
+        ("bash", languages::bash()),
         ("c", languages::c()),
         ("cpp", languages::cpp()),
         ("embedded-template", languages::embedded_template()),
@@ -20,45 +22,87 @@ lazy_static! {
     static ref SCRATCH_DIR: PathBuf = ROOT_DIR.join("target").join("scratch");
     static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
     static ref EXEC_PATH: PathBuf = std::env::current_exe().unwrap();
+    static ref LANGUAGE_FILTER: Option<String> =
+        std::env::var("TREE_SITTER_TEST_LANGUAGE_FILTER").ok();
+    static ref EXAMPLE_FILTER: Option<String> =
+        std::env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
+    static ref LOG_ENABLED: bool = std::env::var("TREE_SITTER_ENABLE_LOG").is_ok();
+    static ref LOG_GRAPH_ENABLED: bool = std::env::var("TREE_SITTER_ENABLE_LOG_GRAPHS").is_ok();
 }
 
 #[test]
 fn test_real_language_corpus_files() {
+    let mut log_session = None;
     let mut parser = Parser::new();
     let grammars_dir = FIXTURES_DIR.join("grammars");
 
-    for (name, language) in LANGUAGES.iter().cloned() {
-        let corpus_dir = grammars_dir.join(name).join("corpus");
+    if *LOG_ENABLED {
+        parser.set_logger(Some(Box::new(|log_type, msg| {
+            if log_type == LogType::Lex {
+                eprintln!("  {}", msg);
+            } else {
+                eprintln!("{}", msg);
+            }
+        })));
+    } else if *LOG_GRAPH_ENABLED {
+        log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
+    }
+
+    for (language_name, language) in LANGUAGES.iter().cloned() {
+        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
+            if !language_name.contains(filter.as_str()) {
+                continue;
+            }
+        }
+
+        eprintln!("language: {:?}", language_name);
+
+        let corpus_dir = grammars_dir.join(language_name).join("corpus");
         let test = parse_tests(&corpus_dir).unwrap();
         parser.set_language(language).unwrap();
         run_mutation_tests(&mut parser, test);
     }
+
+    drop(parser);
+    drop(log_session);
 }
 
 #[test]
 fn test_feature_corpus_files() {
     fs::create_dir_all(SCRATCH_DIR.as_path()).unwrap();
 
-    let filter = std::env::var("TREE_SITTER_TEST_FILTER").ok();
-    let mut loader = Loader::new(SCRATCH_DIR.clone());
+    let loader = Loader::new(SCRATCH_DIR.clone());
+    let mut log_session = None;
     let mut parser = Parser::new();
     let test_grammars_dir = FIXTURES_DIR.join("test_grammars");
 
+    if *LOG_ENABLED {
+        parser.set_logger(Some(Box::new(|log_type, msg| {
+            if log_type == LogType::Lex {
+                eprintln!("  {}", msg);
+            } else {
+                eprintln!("{}", msg);
+            }
+        })));
+    } else if *LOG_GRAPH_ENABLED {
+        log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
+    }
+
     for entry in fs::read_dir(&test_grammars_dir).unwrap() {
         let entry = entry.unwrap();
         if !entry.metadata().unwrap().is_dir() {
             continue;
         }
-        let test_name = entry.file_name();
-        let test_name = test_name.to_str().unwrap();
+        let language_name = entry.file_name();
+        let language_name = language_name.to_str().unwrap();
 
-        if let Some(filter) = filter.as_ref() {
-            if !test_name.contains(filter.as_str()) {
+        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
+            if !language_name.contains(filter.as_str()) {
                 continue;
             }
         }
 
-        eprintln!("test: {:?}", test_name);
+        eprintln!("test language: {:?}", language_name);
 
         let test_path = entry.path();
         let grammar_path = test_path.join("grammar.json");
@@ -78,13 +122,13 @@ fn test_feature_corpus_files() {
             } else {
                 panic!(
                     "Expected error message but got none for test grammar '{}'",
-                    test_name
+                    language_name
                 );
             }
         } else {
             let corpus_path = test_path.join("corpus.txt");
             let c_code = generate_result.unwrap();
-            let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", test_name));
+            let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", language_name));
             if !fs::read_to_string(&parser_c_path)
                 .map(|content| content == c_code)
                 .unwrap_or(false)
@@ -98,18 +142,21 @@ fn test_feature_corpus_files() {
                 None
             };
             let language = loader
-                .load_language_from_sources(test_name, &HEADER_DIR, &parser_c_path, &scanner_path)
+                .load_language_from_sources(
+                    language_name,
+                    &HEADER_DIR,
+                    &parser_c_path,
+                    &scanner_path,
+                )
                 .unwrap();
             let test = parse_tests(&corpus_path).unwrap();
+            parser.set_language(language).unwrap();
+            run_mutation_tests(&mut parser, test);
         }
     }
 
-    // for (name, language) in LANGUAGES.iter().cloned() {
-    //     let corpus_dir = grammars_dir.join(name).join("corpus");
-    //     let test = parse_tests(&corpus_dir).unwrap();
-    //     parser.set_language(language).unwrap();
-    //     run_mutation_tests(&mut parser, test);
-    // }
+    drop(parser);
+    drop(log_session);
 }
 
 fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
@@ -119,6 +166,14 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
             input,
             output,
         } => {
+            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                if !name.contains(filter.as_str()) {
+                    return;
+                }
+            }
+
+            eprintln!("  example: {:?}", name);
+
             let tree = parser
                 .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
                 .unwrap();
diff --git a/cli/src/tests/languages.rs b/cli/src/tests/languages.rs
index 0c483d08..e093d218 100644
--- a/cli/src/tests/languages.rs
+++ b/cli/src/tests/languages.rs
@@ -1,6 +1,7 @@
 use tree_sitter::Language;
 
 extern "C" {
+    fn tree_sitter_bash() -> Language;
     fn tree_sitter_c() -> Language;
     fn tree_sitter_cpp() -> Language;
     fn tree_sitter_embedded_template() -> Language;
@@ -10,6 +11,7 @@ extern "C" {
     fn tree_sitter_rust() -> Language;
 }
 
+pub fn bash() -> Language { unsafe { tree_sitter_bash() } }
 pub fn c() -> Language { unsafe { tree_sitter_c() } }
 pub fn cpp() -> Language { unsafe { tree_sitter_cpp() } }
 pub fn embedded_template() -> Language { unsafe { tree_sitter_embedded_template() } }
diff --git a/cli/src/tests/parser_api.rs b/cli/src/tests/parser_api.rs
index e32c292b..d717bfab 100644
--- a/cli/src/tests/parser_api.rs
+++ b/cli/src/tests/parser_api.rs
@@ -324,10 +324,6 @@ fn test_custom_utf16_input() {
     let mut parser = Parser::new();
     parser.set_language(rust()).unwrap();
 
-    parser.set_logger(Some(Box::new(|t, message| {
-        println!("log: {:?} {}", t, message);
-    })));
-
     let lines: Vec<Vec<u16>> = ["pub fn foo() {", "  1", "}"]
         .iter()
         .map(|s| s.encode_utf16().collect())
diff --git a/cli/src/util.rs b/cli/src/util.rs
index f36cbe79..5c1bc39c 100644
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@@ -1,29 +1,28 @@
-use std::fs::File;
-use std::io::{Result, Write};
+#[cfg(unix)]
+use std::path::PathBuf;
+#[cfg(unix)]
 use std::process::{Child, ChildStdin, Command, Stdio};
-use std::str;
 use tree_sitter::Parser;
 
+const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
+
 #[cfg(windows)]
 pub(crate) struct LogSession();
 
+#[cfg(unix)]
+pub(crate) struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
+
 #[cfg(windows)]
-pub(crate) fn start_logging_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
+pub(crate) fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
     Ok(LogSession())
 }
 
-#[cfg(windows)]
-pub(crate) fn stop_logging_graphs(parser: &mut Parser, mut session: LogSession) -> Result<()> {
-    Ok(())
-}
-
 #[cfg(unix)]
-pub(crate) struct LogSession(Child, ChildStdin);
+pub(crate) fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
+    use std::io::Write;
 
-#[cfg(unix)]
-pub(crate) fn start_logging_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
-    let mut dot_file = File::create(path)?;
-    dot_file.write(b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n")?;
+    let mut dot_file = std::fs::File::create(path)?;
+    dot_file.write(HTML_HEADER)?;
     let mut dot_process = Command::new("dot")
         .arg("-Tsvg")
         .stdin(Stdio::piped())
@@ -34,25 +33,23 @@ pub(crate) fn start_logging_graphs(parser: &mut Parser, path: &str) -> Result<Lo
         .stdin
         .take()
         .expect("Failed to open stdin for Dot");
-
     parser.print_dot_graphs(&dot_stdin);
-
-    Ok(LogSession(dot_process, dot_stdin))
+    Ok(LogSession(PathBuf::from(path), Some(dot_process), Some(dot_stdin)))
 }
 
 #[cfg(unix)]
-pub(crate) fn stop_logging_graphs(parser: &mut Parser, mut session: LogSession) -> Result<()> {
-    drop(session.1);
+impl Drop for LogSession {
+    fn drop(&mut self) {
+        use std::fs;
 
-    if cfg!(unix) {
-        parser.stop_printing_dot_graphs();
+        drop(self.2.take().unwrap());
+        let output = self.1.take().unwrap().wait_with_output().unwrap();
+        if output.status.success() {
+            if cfg!(target_os = "macos") && fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64 {
+                Command::new("open").arg("log.html").output().unwrap();
+            }
+        } else {
+            eprintln!("Dot failed: {} {}", String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr));
+        }
     }
-
-    session.0.wait()?;
-
-    if cfg!(target_os = "macos") {
-        Command::new("open").arg("log.html").output()?;
-    }
-
-    Ok(())
 }
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index 08f863f8..fdb243ec 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -220,7 +220,6 @@ impl Parser {
         unsafe { ffi::ts_parser_print_dot_graphs(self.0, ffi::dup(fd)) }
     }
 
-    #[cfg(unix)]
     pub fn stop_printing_dot_graphs(&mut self) {
         unsafe { ffi::ts_parser_print_dot_graphs(self.0, -1) }
     }
@@ -391,6 +390,7 @@ impl Parser {
 
 impl Drop for Parser {
     fn drop(&mut self) {
+        self.stop_printing_dot_graphs();
         self.set_logger(None);
         unsafe { ffi::ts_parser_delete(self.0) }
     }
diff --git a/script/clean b/script/clean
deleted file mode 100755
index dfa8ff78..00000000
--- a/script/clean
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-
-rm -rf \
-  build out \
-  gyp-mac-tool \
-  Makefile *.Makefile *.target.mk \
-  *.xcodeproj
diff --git a/script/configure b/script/configure
deleted file mode 100755
index f2e511a1..00000000
--- a/script/configure
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-git submodule update --init --recursive
-externals/gyp/gyp project.gyp --depth . --format=make $@
-externals/gyp/gyp tests.gyp --depth . --format=make $@
diff --git a/script/configure.cmd b/script/configure.cmd
deleted file mode 100644
index dc73e8de..00000000
--- a/script/configure.cmd
+++ /dev/null
@@ -1,3 +0,0 @@
-git submodule update --init --recursive
-call .\externals\gyp\gyp.bat project.gyp --depth .
-call .\externals\gyp\gyp.bat tests.gyp --depth .
diff --git a/script/bindgen.sh b/script/generate-bindings
similarity index 100%
rename from script/bindgen.sh
rename to script/generate-bindings
diff --git a/script/test b/script/test
index eb394962..43c274f7 100755
--- a/script/test
+++ b/script/test
@@ -12,150 +12,53 @@ OPTIONS
 
   -h  print this message
 
-  -b  run make under scan-build static analyzer
+  -a  Compile C code with the Clang static analyzer
 
-  -d  run tests in a debugger (either lldb or gdb)
+  -l  run only the corpus tests for the given language
 
-  -g  run tests with valgrind's memcheck tool
-
-  -G  run tests with valgrind's memcheck tool, including a full leak check
-
-  -v  run tests with verbose output
-
-  -f  run only tests whose description contain the given string
+  -e  run only the corpus tests whose name contain the given string
 
   -s  set the seed used to control random behavior
 
+  -d  print parsing log to stderr
+
   -D  pipe tests' stderr to \`dot(1)\` to render an SVG log
 
 EOF
 }
 
-profile=
-leak_check=no
-mode=normal
-verbose=
-args=()
-target=tests
-export BUILDTYPE=Test
-cmd="out/${BUILDTYPE}/${target}"
-run_scan_build=
+export TREE_SITTER_TEST=1
+export RUST_TEST_THREADS=1
+export RUST_BACKTRACE=full
 
-if [ "$(uname -s)" == "Darwin" ]; then
-  export LINK="clang++ -fsanitize=address"
-fi
-
-while getopts "bdf:s:gGhpvD" option; do
+while getopts "bdl:e:s:gGhpvD" option; do
   case ${option} in
     h)
       usage
       exit
       ;;
-    d)
-      mode=debug
+    l)
+      export TREE_SITTER_TEST_LANGUAGE_FILTER=${OPTARG}
       ;;
-    g)
-      mode=valgrind
-      ;;
-    G)
-      mode=valgrind
-      leak_check=full
-      ;;
-    p)
-      profile=true
-      ;;
-    f)
-      args+=("--only=${OPTARG}")
-      ;;
-    v)
-      verbose=true
+    e)
+      export TREE_SITTER_TEST_EXAMPLE_FILTER=${OPTARG}
       ;;
     s)
       export TREE_SITTER_SEED=${OPTARG}
       ;;
-    D)
-      export TREE_SITTER_ENABLE_DEBUG_GRAPHS=1
-      mode=SVG
+    d)
+      export TREE_SITTER_ENABLE_LOG=1
       ;;
-    b)
-      run_scan_build=true
+    D)
+      export TREE_SITTER_ENABLE_LOG_GRAPHS=1
       ;;
   esac
 done
 
-if [[ -n $verbose ]]; then
-  args+=("--reporter=spec")
+if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER ]]; then
+  top_level_filter=corpus
 else
-  args+=("--reporter=singleline")
+  top_level_filter=$1
 fi
 
-if [[ -n "$run_scan_build" ]]; then
-  . script/util/scan-build.sh
-  scan_build make -j2 $target
-else
-  make -j2 $target
-fi
-args=${args:-""}
-
-if [[ -n $profile ]]; then
-  export CPUPROFILE=/tmp/${target}-$(date '+%s').prof
-fi
-
-case ${mode} in
-  valgrind)
-    valgrind                                     \
-      --suppressions=./script/util/valgrind.supp \
-      --dsymutil=yes                             \
-      --leak-check=${leak_check}                 \
-      $cmd "${args[@]}" 2>&1 |                   \
-      grep --color -E '\w+_tests?.cc:\d+|$'
-    ;;
-
-  debug)
-    if hash lldb &> /dev/null; then
-      lldb $cmd -- "${args[@]}"
-    elif hash gdb &> /dev/null; then
-      gdb $cmd -- "${args[@]}"
-    else
-      echo "No debugger found"
-      exit 1
-    fi
-    ;;
-
-  SVG)
-    html_file=log.html
-    dot_file=$html_file.dot
-
-    function write_log_file {
-      echo "<!DOCTYPE html><style>svg { width: 100%; margin-bottom: 20px; }</style>" > $html_file
-      line_count=$(grep -n '^$' $dot_file | tail -1 | cut -f1 -d:)
-      if [[ -n $line_count ]]; then
-        head -n $line_count $dot_file | dot -Tsvg >> $html_file
-      else
-        cat $dot_file | grep -v 'Assertion' | dot -Tsvg >> $html_file
-      fi
-      rm $dot_file
-      echo "Wrote $html_file - $line_count"
-    }
-
-    function handle_sigint {
-      trap '' SIGINT
-      echo
-      write_log_file
-      exit 0
-    }
-    trap handle_sigint SIGINT
-
-    $cmd "${args[@]}" 2> $dot_file || export status=$?
-    write_log_file
-    exit $status
-    ;;
-
-  normal)
-    time $cmd "${args[@]}"
-    ;;
-esac
-
-if [[ -n $profile ]]; then
-  pprof $cmd $CPUPROFILE
-fi
+cargo test --jobs 1 $top_level_filter -- --nocapture
diff --git a/script/test.cmd b/script/test.cmd
index f2d97303..e62eed0e 100644
--- a/script/test.cmd
+++ b/script/test.cmd
@@ -1,9 +1,7 @@
 @echo off
-msbuild /p:Configuration=Test tests.vcxproj
 
-set only_arg=
-IF not "%~1"=="" (
-  set only_arg=--only=%1
-)
+set TREE_SITTER_TEST=1
+set RUST_TEST_THREADS=1
+set RUST_BACKTRACE=full
 
-.\test\tests.exe --reporter=singleline --no-color %only_arg%
+cargo test "%~1"
diff --git a/script/test.sh b/script/test.sh
deleted file mode 100755
index eb6183c0..00000000
--- a/script/test.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-RUST_TREE_SITTER_TEST=1 cargo test $@
diff --git a/script/trim-whitespace b/script/trim-whitespace
deleted file mode 100755
index b67791f5..00000000
--- a/script/trim-whitespace
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-find src test include -type f | xargs perl -pi -e 's/ +$//'
diff --git a/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt b/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
index 06a7bf0b..749264c6 100644
--- a/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
+++ b/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
@@ -19,6 +19,7 @@ anonymous tokens defined with LF escape sequence
 anonymous tokens defined with CR escape sequence
 =================================================
 
+
 ---
 
 (first_rule)
diff --git a/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json b/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
index d2613776..38ada64c 100644
--- a/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
+++ b/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
@@ -5,10 +5,10 @@
       "type": "CHOICE",
       "members": [
         {"type": "STRING", "value": "\n"},
-        {"type": "STRING", "value": "\r"},
+        {"type": "STRING", "value": "\r\n"},
         {"type": "STRING", "value": "'hello'"},
         {"type": "PATTERN", "value": "\\d+"}
       ]
     }
   }
-}
\ No newline at end of file
+}

From a8292f4fe99d87dfee886e146307da0a8beb2a9c Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 10:27:39 -0800
Subject: [PATCH 142/208] Load all fixture grammars dynamically

This way the build doesn't take forever any time a single grammar has
been regenerated.
---
 cli/src/generate/nfa.rs        |  5 +--
 cli/src/generate/properties.rs |  3 +-
 cli/src/loader.rs              | 49 ++++++++++++++++++--------
 cli/src/tests/corpuses.rs      | 64 ++++++++++------------------------
 cli/src/tests/fixtures.rs      | 51 +++++++++++++++++++++++++++
 cli/src/tests/languages.rs     | 21 -----------
 cli/src/tests/mod.rs           |  2 +-
 cli/src/tests/parser_api.rs    |  8 +++--
 lib/build.rs                   | 53 ----------------------------
 script/regenerate-fixtures     |  6 ++++
 10 files changed, 119 insertions(+), 143 deletions(-)
 create mode 100644 cli/src/tests/fixtures.rs
 delete mode 100644 cli/src/tests/languages.rs

diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs
index 54e34814..674391ff 100644
--- a/cli/src/generate/nfa.rs
+++ b/cli/src/generate/nfa.rs
@@ -55,10 +55,6 @@ impl CharacterSet {
         CharacterSet::Include(Vec::new())
     }
 
-    pub fn all() -> Self {
-        CharacterSet::Exclude(Vec::new())
-    }
-
     pub fn negate(self) -> CharacterSet {
         match self {
             CharacterSet::Include(chars) => CharacterSet::Exclude(chars),
@@ -182,6 +178,7 @@ impl CharacterSet {
         }
     }
 
+    #[cfg(test)]
     pub fn contains(&self, c: char) -> bool {
         match self {
             CharacterSet::Include(chars) => chars.contains(&c),
diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index e1492d6f..b16e698a 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -464,7 +464,8 @@ fn parse_property_sheet(path: &Path, css: &str) -> Result<Vec<Rule>> {
             rsass::Item::AtRule { name, args, .. } => match name.as_str() {
                 "schema" => {
                     if let Some(s) = get_sass_string(args) {
-                        let schema_path = resolve_path(path, s)?;
+                        // TODO - use schema
+                        let _schema_path = resolve_path(path, s)?;
                         items.remove(i);
                         continue;
                     } else {
diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index af1ab7be..83b878b9 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -23,11 +23,11 @@ struct LanguageRepo {
 }
 
 pub struct LanguageConfiguration {
-    name: String,
-    content_regex: Option<Regex>,
-    first_line_regex: Option<Regex>,
+    _name: String,
+    _content_regex: Option<Regex>,
+    _first_line_regex: Option<Regex>,
     file_types: Vec<String>,
-    highlight_property_sheet: Option<Result<PropertySheet, PathBuf>>,
+    _highlight_property_sheet: Option<Result<PropertySheet, PathBuf>>,
 }
 
 pub struct Loader {
@@ -108,16 +108,21 @@ impl Loader {
         let language = if let Some(language) = repo.language {
             language
         } else {
-            let language = self.load_language_at_path(&repo.name, &repo.path)?;
+            let src_path = repo.path.join("src");
+            let language = self.load_language_at_path(&repo.name, &src_path, &src_path)?;
             self.language_repos[id].language = Some(language);
             language
         };
         Ok((language, &self.language_repos[id].configurations))
     }
 
-    fn load_language_at_path(&self, name: &str, language_path: &Path) -> io::Result<Language> {
-        let src_path = language_path.join("src");
-        let parser_c_path = src_path.join("parser.c");
+    pub fn load_language_at_path(
+        &self,
+        name: &str,
+        src_path: &Path,
+        header_path: &Path,
+    ) -> io::Result<Language> {
+        let parser_path = src_path.join("parser.c");
 
         let scanner_path;
         let scanner_c_path = src_path.join("scanner.c");
@@ -132,7 +137,7 @@ impl Loader {
             }
         }
 
-        self.load_language_from_sources(name, &src_path, &parser_c_path, &scanner_path)
+        self.load_language_from_sources(name, &header_path, &parser_path, &scanner_path)
     }
 
     pub fn load_language_from_sources(
@@ -148,6 +153,7 @@ impl Loader {
         if needs_recompile(&library_path, &parser_path, &scanner_path)? {
             let mut config = cc::Build::new();
             config
+                .cpp(true)
                 .opt_level(2)
                 .cargo_metadata(false)
                 .target(env!("BUILD_TARGET"))
@@ -197,13 +203,14 @@ impl Loader {
                         "Parser compilation failed.\nStdout: {}\nStderr: {}",
                         String::from_utf8_lossy(&output.stdout),
                         String::from_utf8_lossy(&output.stderr)
-                    ).as_str(),
+                    )
+                    .as_str(),
                 ));
             }
         }
 
         let library = Library::new(library_path)?;
-        let language_fn_name = format!("tree_sitter_{}", name);
+        let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
         let language = unsafe {
             let language_fn: Symbol<unsafe extern "C" fn() -> Language> =
                 library.get(language_fn_name.as_bytes())?;
@@ -248,15 +255,15 @@ impl Loader {
                 configurations
                     .into_iter()
                     .map(|conf| LanguageConfiguration {
-                        name: conf.name,
+                        _name: conf.name,
                         file_types: conf.file_types.unwrap_or(Vec::new()),
-                        content_regex: conf
+                        _content_regex: conf
                             .content_regex
                             .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
-                        first_line_regex: conf
+                        _first_line_regex: conf
                             .first_line_regex
                             .and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
-                        highlight_property_sheet: conf.highlights.map(|d| Err(d.into())),
+                        _highlight_property_sheet: conf.highlights.map(|d| Err(d.into())),
                     })
                     .collect()
             });
@@ -304,3 +311,15 @@ fn needs_recompile(
 fn mtime(path: &Path) -> io::Result<SystemTime> {
     Ok(fs::metadata(path)?.modified()?)
 }
+
+fn replace_dashes_with_underscores(name: &str) -> String {
+    let mut result = String::with_capacity(name.len());
+    for c in name.chars() {
+        if c == '-' {
+            result.push('_');
+        } else {
+            result.push(c);
+        }
+    }
+    result
+}
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index e1fe9189..e55c8f57 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -1,27 +1,21 @@
-use super::languages;
+use super::fixtures::{get_language, get_test_language, fixtures_dir};
 use crate::generate;
-use crate::loader::Loader;
 use crate::test::{parse_tests, TestEntry};
 use crate::util;
 use std::fs;
-use std::path::PathBuf;
-use tree_sitter::{Language, Parser, LogType};
+use tree_sitter::{LogType, Parser};
+
+const LANGUAGES: &'static [&'static str] = &[
+    "bash",
+    "c",
+    "cpp",
+    "embedded-template",
+    "go",
+    "html",
+    "javascript",
+];
 
 lazy_static! {
-    static ref LANGUAGES: [(&'static str, Language); 7] = [
-        ("bash", languages::bash()),
-        ("c", languages::c()),
-        ("cpp", languages::cpp()),
-        ("embedded-template", languages::embedded_template()),
-        ("go", languages::go()),
-        ("html", languages::html()),
-        ("javascript", languages::javascript()),
-    ];
-    static ref ROOT_DIR: PathBuf = [env!("CARGO_MANIFEST_DIR"), ".."].iter().collect();
-    static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
-    static ref SCRATCH_DIR: PathBuf = ROOT_DIR.join("target").join("scratch");
-    static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
-    static ref EXEC_PATH: PathBuf = std::env::current_exe().unwrap();
     static ref LANGUAGE_FILTER: Option<String> =
         std::env::var("TREE_SITTER_TEST_LANGUAGE_FILTER").ok();
     static ref EXAMPLE_FILTER: Option<String> =
@@ -34,7 +28,7 @@ lazy_static! {
 fn test_real_language_corpus_files() {
     let mut log_session = None;
     let mut parser = Parser::new();
-    let grammars_dir = FIXTURES_DIR.join("grammars");
+    let grammars_dir = fixtures_dir().join("grammars");
 
     if *LOG_ENABLED {
         parser.set_logger(Some(Box::new(|log_type, msg| {
@@ -48,7 +42,7 @@ fn test_real_language_corpus_files() {
         log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
     }
 
-    for (language_name, language) in LANGUAGES.iter().cloned() {
+    for language_name in LANGUAGES.iter().cloned() {
         if let Some(filter) = LANGUAGE_FILTER.as_ref() {
             if !language_name.contains(filter.as_str()) {
                 continue;
@@ -57,6 +51,7 @@ fn test_real_language_corpus_files() {
 
         eprintln!("language: {:?}", language_name);
 
+        let language = get_language(language_name);
         let corpus_dir = grammars_dir.join(language_name).join("corpus");
         let test = parse_tests(&corpus_dir).unwrap();
         parser.set_language(language).unwrap();
@@ -69,12 +64,9 @@ fn test_real_language_corpus_files() {
 
 #[test]
 fn test_feature_corpus_files() {
-    fs::create_dir_all(SCRATCH_DIR.as_path()).unwrap();
-
-    let loader = Loader::new(SCRATCH_DIR.clone());
     let mut log_session = None;
     let mut parser = Parser::new();
-    let test_grammars_dir = FIXTURES_DIR.join("test_grammars");
+    let test_grammars_dir = fixtures_dir().join("test_grammars");
 
     if *LOG_ENABLED {
         parser.set_logger(Some(Box::new(|log_type, msg| {
@@ -128,27 +120,7 @@ fn test_feature_corpus_files() {
         } else {
             let corpus_path = test_path.join("corpus.txt");
             let c_code = generate_result.unwrap();
-            let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", language_name));
-            if !fs::read_to_string(&parser_c_path)
-                .map(|content| content == c_code)
-                .unwrap_or(false)
-            {
-                fs::write(&parser_c_path, c_code).unwrap();
-            }
-            let scanner_path = test_path.join("scanner.c");
-            let scanner_path = if scanner_path.exists() {
-                Some(scanner_path)
-            } else {
-                None
-            };
-            let language = loader
-                .load_language_from_sources(
-                    language_name,
-                    &HEADER_DIR,
-                    &parser_c_path,
-                    &scanner_path,
-                )
-                .unwrap();
+            let language = get_test_language(language_name, c_code, &test_path);
             let test = parse_tests(&corpus_path).unwrap();
             parser.set_language(language).unwrap();
             run_mutation_tests(&mut parser, test);
@@ -180,7 +152,7 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
             let actual = tree.root_node().to_sexp();
             assert_eq!(actual, output);
         }
-        TestEntry::Group { name, children } => {
+        TestEntry::Group { children, .. } => {
             for child in children {
                 run_mutation_tests(parser, child);
             }
diff --git a/cli/src/tests/fixtures.rs b/cli/src/tests/fixtures.rs
new file mode 100644
index 00000000..978a1212
--- /dev/null
+++ b/cli/src/tests/fixtures.rs
@@ -0,0 +1,51 @@
+use crate::loader::Loader;
+use std::path::{Path, PathBuf};
+use tree_sitter::Language;
+use std::fs;
+
+lazy_static! {
+    static ref ROOT_DIR: PathBuf = [env!("CARGO_MANIFEST_DIR"), ".."].iter().collect();
+    static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
+    static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
+    static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
+    static ref SCRATCH_DIR: PathBuf = {
+        let result = ROOT_DIR.join("target").join("scratch");
+        fs::create_dir_all(&result).unwrap();
+        result
+    };
+    static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
+}
+
+pub fn fixtures_dir<'a>() -> &'static Path {
+    &FIXTURES_DIR
+}
+
+pub fn get_language(name: &str) -> Language {
+    TEST_LOADER
+        .load_language_at_path(name, &GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR)
+        .unwrap()
+}
+
+pub fn get_test_language(name: &str, parser_code: String, path: &Path) -> Language {
+    let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name));
+    if !fs::read_to_string(&parser_c_path)
+        .map(|content| content == parser_code)
+        .unwrap_or(false)
+    {
+        fs::write(&parser_c_path, parser_code).unwrap();
+    }
+    let scanner_path = path.join("scanner.c");
+    let scanner_path = if scanner_path.exists() {
+        Some(scanner_path)
+    } else {
+        None
+    };
+    TEST_LOADER
+        .load_language_from_sources(
+            name,
+            &HEADER_DIR,
+            &parser_c_path,
+            &scanner_path,
+        )
+        .unwrap()
+}
diff --git a/cli/src/tests/languages.rs b/cli/src/tests/languages.rs
deleted file mode 100644
index e093d218..00000000
--- a/cli/src/tests/languages.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-use tree_sitter::Language;
-
-extern "C" {
-    fn tree_sitter_bash() -> Language;
-    fn tree_sitter_c() -> Language;
-    fn tree_sitter_cpp() -> Language;
-    fn tree_sitter_embedded_template() -> Language;
-    fn tree_sitter_go() -> Language;
-    fn tree_sitter_html() -> Language;
-    fn tree_sitter_javascript() -> Language;
-    fn tree_sitter_rust() -> Language;
-}
-
-pub fn bash() -> Language { unsafe { tree_sitter_bash() } }
-pub fn c() -> Language { unsafe { tree_sitter_c() } }
-pub fn cpp() -> Language { unsafe { tree_sitter_cpp() } }
-pub fn embedded_template() -> Language { unsafe { tree_sitter_embedded_template() } }
-pub fn go() -> Language { unsafe { tree_sitter_go() } }
-pub fn html() -> Language { unsafe { tree_sitter_html() } }
-pub fn javascript() -> Language { unsafe { tree_sitter_javascript() } }
-pub fn rust() -> Language { unsafe { tree_sitter_rust() } }
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index bc199616..c9f1dda4 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -1,3 +1,3 @@
-mod languages;
+mod fixtures;
 mod corpuses;
 mod parser_api;
diff --git a/cli/src/tests/parser_api.rs b/cli/src/tests/parser_api.rs
index d717bfab..a399bf38 100644
--- a/cli/src/tests/parser_api.rs
+++ b/cli/src/tests/parser_api.rs
@@ -1,6 +1,10 @@
-use super::languages::rust;
+use super::fixtures::get_language;
 use std::thread;
-use tree_sitter::{InputEdit, LogType, Parser, Point, PropertySheet};
+use tree_sitter::{InputEdit, LogType, Parser, Point, PropertySheet, Language};
+
+fn rust() -> Language {
+    get_language("rust")
+}
 
 #[test]
 fn test_basic_parsing() {
diff --git a/lib/build.rs b/lib/build.rs
index e4d1f91a..eb6fea8b 100644
--- a/lib/build.rs
+++ b/lib/build.rs
@@ -2,61 +2,8 @@ extern crate cc;
 
 use std::env;
 use std::path::{Path, PathBuf};
-use std::fs;
 
 fn main() {
-    println!("cargo:rerun-if-env-changed=TREE_SITTER_TEST");
-    if env::var("TREE_SITTER_TEST").is_ok() {
-        let mut parser_config = cc::Build::new();
-        parser_config
-            .opt_level(0)
-            .flag_if_supported("-Wno-unused-parameter");
-
-        let mut scanner_c_config = cc::Build::new();
-        scanner_c_config
-            .flag_if_supported("-std=c99")
-            .flag_if_supported("-Wno-unused-parameter");
-
-        let mut scanner_cxx_config = cc::Build::new();
-        scanner_cxx_config
-            .cpp(true)
-            .flag_if_supported("-Wno-unused-parameter");
-
-        let grammars_dir: PathBuf = ["..", "test", "fixtures", "grammars"].iter().collect();
-        for entry in fs::read_dir(&grammars_dir).expect("Failed to list grammar directory") {
-            let entry = entry.expect("Failed to load grammars directory entry");
-            if !entry.path().is_dir() {
-                continue;
-            }
-            let parser_dir_path = entry.path();
-            let parser_src_path = parser_dir_path.join("src");
-            let parser_c_path = parser_src_path.join("parser.c");
-            let scanner_c_path = parser_src_path.join("scanner.c");
-            let scanner_cc_path = parser_src_path.join("scanner.cc");
-
-            println!("cargo:rerun-if-changed={}", parser_c_path.to_str().unwrap());
-            parser_config
-                .include(&parser_src_path)
-                .opt_level(0)
-                .file(&parser_c_path);
-            if scanner_cc_path.exists() {
-                println!("cargo:rerun-if-changed={}", scanner_cc_path.to_str().unwrap());
-                scanner_cxx_config
-                    .include(&parser_src_path)
-                    .file(&scanner_cc_path);
-            } else if scanner_c_path.exists() {
-                println!("cargo:rerun-if-changed={}", scanner_c_path.to_str().unwrap());
-                scanner_c_config
-                    .include(&parser_src_path)
-                    .file(&scanner_c_path);
-            }
-        }
-
-        parser_config.compile("fixture-parsers");
-        scanner_c_config.compile("fixture-scanners-c");
-        scanner_cxx_config.compile("fixture-scanners-cxx");
-    }
-
     println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS");
     if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() {
         let clang_path = which("clang").unwrap();
diff --git a/script/regenerate-fixtures b/script/regenerate-fixtures
index 15e3c09d..c47c53f9 100755
--- a/script/regenerate-fixtures
+++ b/script/regenerate-fixtures
@@ -2,6 +2,8 @@
 
 set -e
 
+cargo build --release
+
 root_dir=$PWD
 tree_sitter=${root_dir}/target/release/tree-sitter
 grammars_dir=${root_dir}/test/fixtures/grammars
@@ -19,6 +21,10 @@ grammar_names=(
   rust
 )
 
+if [[ "$#" > 0 ]]; then
+  grammar_names=($1)
+fi
+
 for grammar_name in "${grammar_names[@]}"; do
   echo "Regenerating ${grammar_name} parser"
   cd ${grammars_dir}/${grammar_name}

From 0a2d72d956b6db5eecd29cbcf8f1c2293b71fbe3 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 12:12:12 -0800
Subject: [PATCH 143/208] Determine language name from package.json, not
 directory

---
 cli/src/loader.rs | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index 83b878b9..26064f04 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -221,14 +221,6 @@ impl Loader {
     }
 
     fn find_language_at_path<'a>(&'a mut self, parser_path: &Path) -> io::Result<usize> {
-        let name = parser_path
-            .file_name()
-            .unwrap()
-            .to_str()
-            .unwrap()
-            .split_at("tree-sitter-".len())
-            .1;
-
         #[derive(Deserialize)]
         struct LanguageConfigurationJSON {
             name: String,
@@ -243,6 +235,7 @@ impl Loader {
 
         #[derive(Deserialize)]
         struct PackageJSON {
+            name: String,
             #[serde(rename = "tree-sitter")]
             tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
         }
@@ -278,7 +271,7 @@ impl Loader {
         }
 
         self.language_repos.push(LanguageRepo {
-            name: name.to_string(),
+            name: package_json.name.split_at("tree-sitter-".len()).1.to_string(),
             path: parser_path.to_owned(),
             language: None,
             configurations,

From b799b46f790ccfee6c3e77f98b9129c0d13a021e Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 12:13:14 -0800
Subject: [PATCH 144/208] Handle repetition range operators with commas in
 regexes

---
 cli/src/generate/prepare_grammar/expand_tokens.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
index 1e2ef2e5..9cc527bd 100644
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -10,7 +10,7 @@ use regex::Regex;
 use std::i32;
 
 lazy_static! {
-    static ref CURLY_BRACE_REGEX: Regex = Regex::new(r#"(^|[^\\])\{([^}]*[^0-9}][^}]*)\}"#).unwrap();
+    static ref CURLY_BRACE_REGEX: Regex = Regex::new(r#"(^|[^\\])\{([^}]*[^0-9,}][^}]*)\}"#).unwrap();
 }
 
 const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];

From d8ab36b2a598c0a75dfc8756e7223e6fd60375be Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 12:13:42 -0800
Subject: [PATCH 145/208] Fix bugs in handling tokens that overlap with
 separators

---
 .../generate/build_tables/build_lex_table.rs  | 20 ++++++--
 .../generate/build_tables/token_conflicts.rs  | 22 ++------
 cli/src/generate/grammars.rs                  | 21 +++++++-
 cli/src/generate/nfa.rs                       |  2 +-
 cli/src/test.rs                               | 51 +++++++++++--------
 cli/src/tests/corpuses.rs                     | 38 ++++++++++----
 6 files changed, 98 insertions(+), 56 deletions(-)

diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs
index 200c6959..15f09f6b 100644
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@@ -191,6 +191,7 @@ impl<'a> LexTableBuilder<'a> {
         );
 
         let transitions = self.cursor.transitions();
+        let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
         info!("lex state: {}, transitions: {:?}", state_id, transitions);
 
         // If EOF is a valid lookahead token, add a transition predicated on the null
@@ -214,12 +215,23 @@ impl<'a> LexTableBuilder<'a> {
             is_separator,
         } in transitions
         {
-            if let Some((_, completed_precedence)) = completion {
-                if precedence < completed_precedence
-                    || (precedence == completed_precedence && is_separator)
-                {
+            if let Some((completed_id, completed_precedence)) = completion {
+                if precedence < completed_precedence {
                     continue;
                 }
+
+                if precedence == completed_precedence {
+                    if is_separator {
+                        continue;
+                    }
+                    if has_sep && self.lexical_grammar
+                        .variable_indices_for_nfa_states(&states)
+                        .position(|i| i == completed_id)
+                        .is_none()
+                    {
+                        continue;
+                    }
+                }
             }
             let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
             let next_state = if next_state_id == state_id {
diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs
index 1a63bfc8..df3d4250 100644
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@@ -58,7 +58,7 @@ impl<'a> TokenConflictMap<'a> {
 
     pub fn does_conflict(&self, i: usize, j: usize) -> bool {
         let entry = &self.status_matrix[matrix_index(self.n, i, j)];
-        entry.does_match_valid_continuation || entry.does_match_separators
+        entry.does_match_valid_continuation || entry.does_match_separators || entry.matches_same_string
     }
 
     pub fn does_overlap(&self, i: usize, j: usize) -> bool {
@@ -176,7 +176,7 @@ fn compute_conflict_status(
 
     while let Some(state_set) = state_set_queue.pop() {
         // Don't pursue states where there's no potential for conflict.
-        if variable_ids_for_states(&state_set, grammar).count() > 1 {
+        if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
             cursor.reset(state_set);
         } else {
             continue;
@@ -226,7 +226,7 @@ fn compute_conflict_status(
             if let Some((completed_id, completed_precedence)) = completion {
                 let mut other_id = None;
                 let mut successor_contains_completed_id = false;
-                for variable_id in variable_ids_for_states(&states, grammar) {
+                for variable_id in grammar.variable_indices_for_nfa_states(&states) {
                     if variable_id == completed_id {
                         successor_contains_completed_id = true;
                         break;
@@ -269,22 +269,6 @@ fn compute_conflict_status(
     result
 }
 
-fn variable_ids_for_states<'a>(
-    state_ids: &'a Vec<u32>,
-    grammar: &'a LexicalGrammar,
-) -> impl Iterator<Item = usize> + 'a {
-    let mut prev = None;
-    state_ids.iter().filter_map(move |state_id| {
-        let variable_id = grammar.variable_index_for_nfa_state(*state_id);
-        if prev != Some(variable_id) {
-            prev = Some(variable_id);
-            prev
-        } else {
-            None
-        }
-    })
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs
index 3772bfd4..3cedcd42 100644
--- a/cli/src/generate/grammars.rs
+++ b/cli/src/generate/grammars.rs
@@ -175,8 +175,27 @@ impl Variable {
 }
 
 impl LexicalGrammar {
+    pub fn variable_indices_for_nfa_states<'a>(
+        &'a self,
+        state_ids: &'a Vec<u32>,
+    ) -> impl Iterator<Item = usize> + 'a {
+        let mut prev = None;
+        state_ids.iter().filter_map(move |state_id| {
+            let variable_id = self.variable_index_for_nfa_state(*state_id);
+            if prev != Some(variable_id) {
+                prev = Some(variable_id);
+                prev
+            } else {
+                None
+            }
+        })
+    }
+
     pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
-        self.variables.iter().position(|v| v.start_state >= state_id).unwrap()
+        self.variables
+            .iter()
+            .position(|v| v.start_state >= state_id)
+            .unwrap()
     }
 }
 
diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs
index 674391ff..ca2e5405 100644
--- a/cli/src/generate/nfa.rs
+++ b/cli/src/generate/nfa.rs
@@ -374,7 +374,7 @@ impl<'a> NfaCursor<'a> {
                     }
                     let intersection_transition = NfaTransition {
                         characters: intersection,
-                        is_separator: result[i].is_separator || is_sep,
+                        is_separator: result[i].is_separator && is_sep,
                         precedence: max(result[i].precedence, prec),
                         states: intersection_states,
                     };
diff --git a/cli/src/test.rs b/cli/src/test.rs
index bcea3dcc..4d6034e5 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -75,30 +75,10 @@ pub fn run_tests_at_path(
             println!("{} failures:", failures.len())
         }
 
-        println!(
-            "\n{} / {}",
-            Colour::Green.paint("expected"),
-            Colour::Red.paint("actual")
-        );
-
+        print_diff_key();
         for (i, (name, actual, expected)) in failures.iter().enumerate() {
             println!("\n  {}. {}:", i + 1, name);
-            let changeset = Changeset::new(actual, expected, " ");
-            print!("    ");
-            for diff in &changeset.diffs {
-                match diff {
-                    Difference::Same(part) => {
-                        print!("{}{}", part, changeset.split);
-                    }
-                    Difference::Add(part) => {
-                        print!("{}{}", Colour::Green.paint(part), changeset.split);
-                    }
-                    Difference::Rem(part) => {
-                        print!("{}{}", Colour::Red.paint(part), changeset.split);
-                    }
-                }
-            }
-            println!("");
+            print_diff(actual, expected);
         }
     }
 
@@ -106,6 +86,33 @@ pub fn run_tests_at_path(
     Ok(())
 }
 
+pub fn print_diff_key() {
+    println!(
+        "\n{} / {}",
+        Colour::Green.paint("expected"),
+        Colour::Red.paint("actual")
+    );
+}
+
+pub fn print_diff(actual: &String, expected: &String) {
+    let changeset = Changeset::new(actual, expected, " ");
+    print!("    ");
+    for diff in &changeset.diffs {
+        match diff {
+            Difference::Same(part) => {
+                print!("{}{}", part, changeset.split);
+            }
+            Difference::Add(part) => {
+                print!("{}{}", Colour::Green.paint(part), changeset.split);
+            }
+            Difference::Rem(part) => {
+                print!("{}{}", Colour::Red.paint(part), changeset.split);
+            }
+        }
+    }
+    println!("");
+}
+
 fn run_tests(
     parser: &mut Parser,
     test_entry: TestEntry,
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index e55c8f57..707158cf 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -1,6 +1,6 @@
 use super::fixtures::{get_language, get_test_language, fixtures_dir};
 use crate::generate;
-use crate::test::{parse_tests, TestEntry};
+use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
 use std::fs;
 use tree_sitter::{LogType, Parser};
@@ -13,6 +13,7 @@ const LANGUAGES: &'static [&'static str] = &[
     "go",
     "html",
     "javascript",
+    "python",
 ];
 
 lazy_static! {
@@ -42,9 +43,10 @@ fn test_real_language_corpus_files() {
         log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
     }
 
+    let mut did_fail = false;
     for language_name in LANGUAGES.iter().cloned() {
         if let Some(filter) = LANGUAGE_FILTER.as_ref() {
-            if !language_name.contains(filter.as_str()) {
+            if language_name != filter.as_str() {
                 continue;
             }
         }
@@ -55,11 +57,15 @@ fn test_real_language_corpus_files() {
         let corpus_dir = grammars_dir.join(language_name).join("corpus");
         let test = parse_tests(&corpus_dir).unwrap();
         parser.set_language(language).unwrap();
-        run_mutation_tests(&mut parser, test);
+        did_fail |= run_mutation_tests(&mut parser, test);
     }
 
     drop(parser);
     drop(log_session);
+
+    if did_fail {
+        panic!("Corpus tests failed");
+    }
 }
 
 #[test]
@@ -80,6 +86,7 @@ fn test_feature_corpus_files() {
         log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
     }
 
+    let mut did_fail = false;
     for entry in fs::read_dir(&test_grammars_dir).unwrap() {
         let entry = entry.unwrap();
         if !entry.metadata().unwrap().is_dir() {
@@ -89,7 +96,7 @@ fn test_feature_corpus_files() {
         let language_name = language_name.to_str().unwrap();
 
         if let Some(filter) = LANGUAGE_FILTER.as_ref() {
-            if !language_name.contains(filter.as_str()) {
+            if language_name != filter.as_str() {
                 continue;
             }
         }
@@ -123,15 +130,19 @@ fn test_feature_corpus_files() {
             let language = get_test_language(language_name, c_code, &test_path);
             let test = parse_tests(&corpus_path).unwrap();
             parser.set_language(language).unwrap();
-            run_mutation_tests(&mut parser, test);
+            did_fail |= run_mutation_tests(&mut parser, test);
         }
     }
 
     drop(parser);
     drop(log_session);
+
+    if did_fail {
+        panic!("Corpus tests failed");
+    }
 }
 
-fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
+fn run_mutation_tests(parser: &mut Parser, test: TestEntry) -> bool {
     match test {
         TestEntry::Example {
             name,
@@ -140,7 +151,7 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
         } => {
             if let Some(filter) = EXAMPLE_FILTER.as_ref() {
                 if !name.contains(filter.as_str()) {
-                    return;
+                    return false;
                 }
             }
 
@@ -150,12 +161,21 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) {
                 .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
                 .unwrap();
             let actual = tree.root_node().to_sexp();
-            assert_eq!(actual, output);
+            if actual != output {
+                print_diff_key();
+                print_diff(&actual, &output);
+                println!("");
+                true
+            } else {
+                false
+            }
         }
         TestEntry::Group { children, .. } => {
+            let mut result = false;
             for child in children {
-                run_mutation_tests(parser, child);
+                result |= run_mutation_tests(parser, child);
             }
+            result
         }
     }
 }

From 522021b107c00bbd146dbc0f813d16e3bce8e550 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 15:57:29 -0800
Subject: [PATCH 146/208] Fix NFA generation w/ nested groups

---
 .../generate/prepare_grammar/expand_tokens.rs  | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
index 9cc527bd..6b92713e 100644
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -263,7 +263,7 @@ impl NfaBuilder {
                     Ok(result)
                 }
             },
-            Ast::Group(group) => self.expand_regex(&group.ast, self.nfa.last_state_id()),
+            Ast::Group(group) => self.expand_regex(&group.ast, next_state_id),
             Ast::Alternation(alternation) => {
                 let mut alternative_state_ids = Vec::new();
                 for ast in alternation.asts.iter() {
@@ -619,7 +619,18 @@ mod tests {
                     ("12e34", Some((0, "12e34"))),
                 ],
             },
-            // Allowing unrecognized escape sequences
+            // nested groups
+            Row {
+                rules: vec![Rule::seq(vec![
+                    Rule::pattern(r#"([^x\\]|\\(.|\n))+"#),
+                ])],
+                separators: vec![],
+                examples: vec![
+                    ("abcx", Some((0, "abc"))),
+                    ("abc\\0x", Some((0, "abc\\0"))),
+                ],
+            },
+            // allowing unrecognized escape sequences
             Row {
                 rules: vec![
                     // Escaped forward slash (used in JS because '/' is the regex delimiter)
@@ -636,7 +647,7 @@ mod tests {
                     (r#"'\'a"#, Some((2, r#"'\'"#))),
                 ],
             },
-            // Allowing un-escaped curly braces
+            // allowing un-escaped curly braces
             Row {
                 rules: vec![
                     // Un-escaped curly braces
@@ -649,7 +660,6 @@ mod tests {
                     ("u{1234} ok", Some((0, "u{1234}"))),
                     ("{aba}}", Some((1, "{aba}"))),
                 ],
-
             }
         ];
 

From ceff3936ef6e9231e2ea78e1edaaac8370f542f0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 16:10:52 -0800
Subject: [PATCH 147/208] Unify logic for handling tokens that match separators
 into one place

---
 .../generate/build_tables/build_lex_table.rs  | 39 +++------
 .../generate/build_tables/token_conflicts.rs  | 85 +++++++++++++------
 2 files changed, 71 insertions(+), 53 deletions(-)

diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs
index 15f09f6b..03ec0c7b 100644
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex;
 use super::item::TokenSet;
 use super::token_conflicts::TokenConflictMap;
 use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
-use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
+use crate::generate::nfa::{CharacterSet, NfaCursor};
 use crate::generate::rules::Symbol;
 use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
 use std::collections::hash_map::Entry;
@@ -208,42 +208,31 @@ impl<'a> LexTableBuilder<'a> {
             ));
         }
 
-        for NfaTransition {
-            characters,
-            precedence,
-            states,
-            is_separator,
-        } in transitions
-        {
+        for transition in transitions {
             if let Some((completed_id, completed_precedence)) = completion {
-                if precedence < completed_precedence {
+                if !TokenConflictMap::prefer_transition(
+                    &self.lexical_grammar,
+                    &transition,
+                    completed_id,
+                    completed_precedence,
+                    has_sep,
+                ) {
                     continue;
                 }
-
-                if precedence == completed_precedence {
-                    if is_separator {
-                        continue;
-                    }
-                    if has_sep && self.lexical_grammar
-                        .variable_indices_for_nfa_states(&states)
-                        .position(|i| i == completed_id)
-                        .is_none()
-                    {
-                        continue;
-                    }
-                }
             }
-            let (next_state_id, _) = self.add_state(states, eof_valid && is_separator);
+
+            let (next_state_id, _) =
+                self.add_state(transition.states, eof_valid && transition.is_separator);
             let next_state = if next_state_id == state_id {
                 None
             } else {
                 Some(next_state_id)
             };
             self.table.states[state_id].advance_actions.push((
-                characters,
+                transition.characters,
                 AdvanceAction {
                     state: next_state,
-                    in_main_token: !is_separator,
+                    in_main_token: !transition.is_separator,
                 },
             ));
         }
diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs
index df3d4250..13c69c19 100644
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@@ -58,7 +58,9 @@ impl<'a> TokenConflictMap<'a> {
 
     pub fn does_conflict(&self, i: usize, j: usize) -> bool {
         let entry = &self.status_matrix[matrix_index(self.n, i, j)];
-        entry.does_match_valid_continuation || entry.does_match_separators || entry.matches_same_string
+        entry.does_match_valid_continuation
+            || entry.does_match_separators
+            || entry.matches_same_string
     }
 
     pub fn does_overlap(&self, i: usize, j: usize) -> bool {
@@ -81,6 +83,32 @@ impl<'a> TokenConflictMap<'a> {
             Ordering::Equal => left.1 < right.1,
         }
     }
+
+    pub fn prefer_transition(
+        grammar: &LexicalGrammar,
+        t: &NfaTransition,
+        completed_id: usize,
+        completed_precedence: i32,
+        has_separator_transitions: bool,
+    ) -> bool {
+        if t.precedence < completed_precedence {
+            return false;
+        }
+        if t.precedence == completed_precedence {
+            if t.is_separator {
+                return false;
+            }
+            if has_separator_transitions
+                && grammar
+                    .variable_indices_for_nfa_states(&t.states)
+                    .position(|i| i == completed_id)
+                    .is_none()
+            {
+                return false;
+            }
+        }
+        true
+    }
 }
 
 impl<'a> fmt::Debug for TokenConflictMap<'a> {
@@ -97,7 +125,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
         for i in 0..self.n {
             write!(
                 f,
-                "    {}: {:?},\n",
+                "    {:?}: {:?},\n",
                 self.grammar.variables[i].name, self.following_chars_by_index[i]
             )?;
         }
@@ -105,11 +133,11 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
 
         write!(f, "  status_matrix: {{\n")?;
         for i in 0..self.n {
-            write!(f, "    {}: {{\n", self.grammar.variables[i].name)?;
+            write!(f, "    {:?}: {{\n", self.grammar.variables[i].name)?;
             for j in 0..self.n {
                 write!(
                     f,
-                    "      {}: {:?},\n",
+                    "      {:?}: {:?},\n",
                     self.grammar.variables[j].name,
                     self.status_matrix[matrix_index(self.n, i, j)]
                 )?;
@@ -191,19 +219,19 @@ fn compute_conflict_status(
 
                 // Prefer tokens with higher precedence. For tokens with equal precedence,
                 // prefer those listed earlier in the grammar.
-                let winning_id;
+                let preferred_id;
                 if TokenConflictMap::prefer_token(
                     grammar,
                     (prev_precedence, prev_id),
                     (precedence, id),
                 ) {
-                    winning_id = prev_id;
+                    preferred_id = prev_id;
                 } else {
-                    winning_id = id;
+                    preferred_id = id;
                     completion = Some((id, precedence));
                 }
 
-                if winning_id == i {
+                if preferred_id == i {
                     result.0.matches_same_string = true;
                     result.0.does_overlap = true;
                 } else {
@@ -215,18 +243,14 @@ fn compute_conflict_status(
             }
         }
 
-        for NfaTransition {
-            characters,
-            precedence,
-            states,
-            is_separator,
-        } in cursor.transitions()
-        {
+        let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
+
+        for transition in cursor.transitions() {
             let mut can_advance = true;
             if let Some((completed_id, completed_precedence)) = completion {
                 let mut other_id = None;
                 let mut successor_contains_completed_id = false;
-                for variable_id in grammar.variable_indices_for_nfa_states(&states) {
+                for variable_id in grammar.variable_indices_for_nfa_states(&transition.states) {
                     if variable_id == completed_id {
                         successor_contains_completed_id = true;
                         break;
@@ -236,33 +260,38 @@ fn compute_conflict_status(
                 }
 
                 if let (Some(other_id), false) = (other_id, successor_contains_completed_id) {
-                    let winning_id;
-                    if precedence < completed_precedence {
-                        winning_id = completed_id;
-                        can_advance = false;
+                    let preferred_id = if TokenConflictMap::prefer_transition(
+                        grammar,
+                        &transition,
+                        completed_id,
+                        completed_precedence,
+                        has_sep,
+                    ) {
+                        can_advance = true;
+                        other_id
                     } else {
-                        winning_id = other_id;
-                    }
+                        completed_id
+                    };
 
-                    if winning_id == i {
+                    if preferred_id == i {
                         result.0.does_overlap = true;
-                        if characters.does_intersect(&following_chars[j]) {
+                        if transition.characters.does_intersect(&following_chars[j]) {
                             result.0.does_match_valid_continuation = true;
                         }
-                        if is_separator {
+                        if transition.is_separator || has_sep {
                             result.0.does_match_separators = true;
                         }
                     } else {
                         result.1.does_overlap = true;
-                        if characters.does_intersect(&following_chars[i]) {
+                        if transition.characters.does_intersect(&following_chars[i]) {
                             result.1.does_match_valid_continuation = true;
                         }
                     }
                 }
             }
 
-            if can_advance && visited_state_sets.insert(states.clone()) {
-                state_set_queue.push(states);
+            if can_advance && visited_state_sets.insert(transition.states.clone()) {
+                state_set_queue.push(transition.states);
             }
         }
     }

From 0ee11584a7ea4bb39ed0066899ad1ceb5eb36cb8 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 16:12:30 -0800
Subject: [PATCH 148/208] Add -xc compiler flag for pure-C external scanners

---
 cli/src/loader.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index 26064f04..afb18f9e 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -188,7 +188,7 @@ impl Loader {
                     .arg(parser_path);
                 if let Some(scanner_path) = scanner_path.as_ref() {
                     if scanner_path.extension() == Some("c".as_ref()) {
-                        command.arg(scanner_path);
+                        command.arg("-xc").arg("-std=c99").arg(scanner_path);
                     } else {
                         command.arg("-xc++").arg(scanner_path);
                     }
@@ -271,7 +271,11 @@ impl Loader {
         }
 
         self.language_repos.push(LanguageRepo {
-            name: package_json.name.split_at("tree-sitter-".len()).1.to_string(),
+            name: package_json
+                .name
+                .split_at("tree-sitter-".len())
+                .1
+                .to_string(),
             path: parser_path.to_owned(),
             language: None,
             configurations,

From d23a03bdf118737c80fc19027000ee16df48bbb6 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 16:37:54 -0800
Subject: [PATCH 149/208] Represent ParseItemSet as a sorted Vec, not a
 BTreeMap

---
 .../build_tables/build_parse_table.rs         | 14 ++++--------
 cli/src/generate/build_tables/item.rs         | 22 ++++++++++++++-----
 .../generate/build_tables/item_set_builder.rs |  8 ++-----
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs
index b87cc3d0..792a8759 100644
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@@ -174,18 +174,12 @@ impl<'a> ParseTableBuilder<'a> {
                     non_terminal_successors
                         .entry(next_symbol)
                         .or_insert_with(|| ParseItemSet::default())
-                        .entries
-                        .entry(successor)
-                        .or_insert_with(|| TokenSet::new())
-                        .insert_all(lookaheads);
+                        .insert(successor, lookaheads);
                 } else {
                     terminal_successors
                         .entry(next_symbol)
                         .or_insert_with(|| ParseItemSet::default())
-                        .entries
-                        .entry(successor)
-                        .or_insert_with(|| TokenSet::new())
-                        .insert_all(lookaheads);
+                        .insert(successor, lookaheads);
                 }
             } else {
                 let action = if item.is_augmented() {
@@ -620,8 +614,8 @@ impl<'a> ParseTableBuilder<'a> {
     ) -> AuxiliarySymbolInfo {
         let parent_symbols = item_set
             .entries
-            .keys()
-            .filter_map(|item| {
+            .iter()
+            .filter_map(|(item, _)| {
                 let variable_index = item.variable_index as usize;
                 if item.symbol() == Some(symbol)
                     && !self.syntax_grammar.variables[variable_index].is_auxiliary()
diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs
index 279c5df6..0222ac21 100644
--- a/cli/src/generate/build_tables/item.rs
+++ b/cli/src/generate/build_tables/item.rs
@@ -3,7 +3,6 @@ use crate::generate::rules::Associativity;
 use crate::generate::rules::{Symbol, SymbolType};
 use smallbitvec::SmallBitVec;
 use std::cmp::Ordering;
-use std::collections::BTreeMap;
 use std::fmt;
 use std::hash::{Hash, Hasher};
 use std::iter::FromIterator;
@@ -40,7 +39,7 @@ pub(crate) struct ParseItem<'a> {
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) struct ParseItemSet<'a> {
-    pub entries: BTreeMap<ParseItem<'a>, TokenSet>,
+    pub entries: Vec<(ParseItem<'a>, TokenSet)>,
 }
 
 pub(crate) struct ParseItemDisplay<'a>(
@@ -227,15 +226,28 @@ impl<'a> ParseItemSet<'a> {
     pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
         let mut result = Self::default();
         for (item, lookaheads) in elements {
-            result.entries.insert(item, lookaheads);
+            result.insert(item, &lookaheads);
         }
         result
     }
 
+    pub fn insert(&mut self, item: ParseItem<'a>, lookaheads: &TokenSet) -> &mut TokenSet {
+        match self.entries.binary_search_by(|(i, _)| i.cmp(&item)) {
+            Err(i) => {
+                self.entries.insert(i, (item, lookaheads.clone()));
+                &mut self.entries[i].1
+            },
+            Ok(i) => {
+                self.entries[i].1.insert_all(lookaheads);
+                &mut self.entries[i].1
+            }
+        }
+    }
+
     pub fn hash_unfinished_items(&self, h: &mut impl Hasher) {
         let mut previous_variable_index = u32::MAX;
         let mut previous_step_index = u32::MAX;
-        for item in self.entries.keys() {
+        for (item, _) in self.entries.iter() {
             if item.step().is_none() && item.variable_index != previous_variable_index
                 || item.step_index != previous_step_index
             {
@@ -251,7 +263,7 @@ impl<'a> ParseItemSet<'a> {
 impl<'a> Default for ParseItemSet<'a> {
     fn default() -> Self {
         Self {
-            entries: BTreeMap::new(),
+            entries: Vec::new(),
         }
     }
 }
diff --git a/cli/src/generate/build_tables/item_set_builder.rs b/cli/src/generate/build_tables/item_set_builder.rs
index 56d7c7c4..b941b179 100644
--- a/cli/src/generate/build_tables/item_set_builder.rs
+++ b/cli/src/generate/build_tables/item_set_builder.rs
@@ -285,18 +285,14 @@ impl<'a> ParseItemSetBuilder<'a> {
 
                 // Use the pre-computed *additions* to expand the non-terminal.
                 for addition in &self.transitive_closure_additions[step.symbol.index] {
-                    let lookaheads = set
-                        .entries
-                        .entry(addition.item)
-                        .or_insert_with(|| TokenSet::new());
-                    lookaheads.insert_all(&addition.info.lookaheads);
+                    let lookaheads = set.insert(addition.item, &addition.info.lookaheads);
                     if addition.info.propagates_lookaheads {
                         lookaheads.insert_all(following_tokens);
                     }
                 }
             }
         }
-        set.entries.insert(item, lookaheads.clone());
+        set.insert(item, lookaheads);
     }
 }
 

From ef735eb94228423e367f36900225c57c6eb8d9b9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 19:18:33 -0800
Subject: [PATCH 150/208] Upload binary artifacts from CI builds

---
 .appveyor.yml   | 21 +++++++++++++++++++++
 .travis.yml     | 21 +++++++++++++++++++++
 cli/src/util.rs |  3 ++-
 lib/build.rs    | 14 +++++++++++---
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 29193a53..0c9de3ac 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -10,6 +10,10 @@ install:
   # Install dependencies
   - git submodule update --init
 
+platform:
+  - x64
+  - x86
+
 test_script:
   # Fetch and regenerate the fixture parsers
   - script\fetch-fixtures.cmd
@@ -23,6 +27,23 @@ test_script:
 branches:
   only:
     - master
+    - /\d+\.\d+\.\d+.*/
+
+before_deploy:
+  - move target\release\tree-sitter.exe tree-sitter.exe
+  - 7z a tree-sitter-windows-%PLATFORM%.zip tree-sitter.exe
+  - appveyor PushArtifact tree-sitter-windows-%PLATFORM%.zip
+
+deploy:
+  description: ''
+  provider: GitHub
+  auth_token:
+    secure: VC9ntV5+inKoNteZyLQksKzWMKXF46P+Jx3JHKVSfF+o1rWtZn2iIHAVsQv5LaUi
+  artifact: /tree-sitter-windows-.*.zip/
+  draft: true
+  force_update: true
+  on:
+    APPVEYOR_REPO_TAG: true
 
 cache:
   - target
diff --git a/.travis.yml b/.travis.yml
index 5f981ce9..55fc9276 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,10 @@ language: rust
 rust:
   - stable
 
+os:
+  - linux
+  - osx
+
 script:
   # Fetch and regenerate the fixture parsers
   - script/fetch-fixtures
@@ -15,6 +19,23 @@ script:
 branches:
   only:
   - master
+  - /\d+\.\d+\.\d+/
+
+before_deploy:
+  - mv target/release/tree-sitter .
+  - tar czf tree-sitter-${TRAVIS_OS_NAME}-x64.tar.gz tree-sitter
+
+deploy:
+  provider: releases
+  api_key:
+    secure: "cAd2mQP+Q55v3zedo5ZyOVc3hq3XKMW93lp5LuXV6CYKYbIhkyfym4qfs+C9GJQiIP27cnePYM7B3+OMIFwSPIgXHWWSsuloMtDgYSc/PAwb2dZnJqAyog3BohW/QiGTSnvbVlxPF6P9RMQU6+JP0HJzEJy6QBTa4Und/j0jm24="
+  file_glob: true
+  file: "tree-sitter-*.tar.gz"
+  draft: true
+  overwrite: true
+  skip_cleanup: true
+  on:
+    tags: true
 
 cache:
   cargo: true
diff --git a/cli/src/util.rs b/cli/src/util.rs
index 5c1bc39c..166e54d0 100644
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@@ -4,6 +4,7 @@ use std::path::PathBuf;
 use std::process::{Child, ChildStdin, Command, Stdio};
 use tree_sitter::Parser;
 
+#[cfg(unix)]
 const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
 
 #[cfg(windows)]
@@ -13,7 +14,7 @@ pub(crate) struct LogSession();
 pub(crate) struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
 
 #[cfg(windows)]
-pub(crate) fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
+pub(crate) fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
     Ok(LogSession())
 }
 
diff --git a/lib/build.rs b/lib/build.rs
index eb6fea8b..2a121001 100644
--- a/lib/build.rs
+++ b/lib/build.rs
@@ -6,9 +6,17 @@ use std::path::{Path, PathBuf};
 fn main() {
     println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS");
     if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() {
-        let clang_path = which("clang").unwrap();
-        let clang_path = clang_path.to_str().unwrap();
-        env::set_var("CC", &format!("scan-build -analyze-headers --use-analyzer={} cc", clang_path));
+        if let (Some(clang_path), Some(scan_build_path)) = (which("clang"), which("scan-build")) {
+            let clang_path = clang_path.to_str().unwrap();
+            let scan_build_path = scan_build_path.to_str().unwrap();
+            env::set_var(
+                "CC",
+                &format!(
+                    "{} -analyze-headers --use-analyzer={} cc",
+                    scan_build_path, clang_path
+                ),
+            );
+        }
     }
 
     let mut config = cc::Build::new();

From b0fe8164414b900b9fdea72824071c62894857f0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 16 Jan 2019 11:42:50 -0800
Subject: [PATCH 151/208] Add npm module

---
 cli/npm/.gitignore        |  4 +++
 cli/npm/install.js        | 71 +++++++++++++++++++++++++++++++++++++++
 cli/npm/package-lock.json |  5 +++
 cli/npm/package.json      | 22 ++++++++++++
 4 files changed, 102 insertions(+)
 create mode 100644 cli/npm/.gitignore
 create mode 100755 cli/npm/install.js
 create mode 100644 cli/npm/package-lock.json
 create mode 100644 cli/npm/package.json

diff --git a/cli/npm/.gitignore b/cli/npm/.gitignore
new file mode 100644
index 00000000..306613e7
--- /dev/null
+++ b/cli/npm/.gitignore
@@ -0,0 +1,4 @@
+tree-sitter
+tree-sitter.exe
+*.tar.gz
+*.zip
diff --git a/cli/npm/install.js b/cli/npm/install.js
new file mode 100755
index 00000000..5564ce98
--- /dev/null
+++ b/cli/npm/install.js
@@ -0,0 +1,71 @@
+#!/usr/bin/env node
+
+const fs = require('fs');
+const https = require('https');
+const execFileSync = require('child_process').execFileSync;
+const packageJSON = require('./package.json');
+
+// Determine the URL of the file.
+const isWindows = process.platform === 'win32';
+const platformName = {
+  'darwin': 'osx',
+  'linux': 'linux',
+  'win32': 'windows'
+}[process.platform];
+if (!platformName) {
+  throw new Error(`Cannot install tree-sitter-cli for platform ${process.platform}`);
+}
+const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/${packageJSON.version}`;
+const assetExtension = isWindows ? 'zip' : 'tar.gz';
+const assetName = `tree-sitter-${platformName}-${process.arch}.${assetExtension}`;
+const assetURL = `${releaseURL}/${assetName}`;
+
+// Remove previously-downloaded files.
+const executableName = isWindows ? 'tree-sitter.exe' : 'tree-sitter';
+if (fs.existsSync(executableName)) {
+  fs.unlinkSync(executableName);
+}
+if (fs.existsSync(assetName)) {
+  fs.unlinkSync(assetName);
+}
+
+// Download the compressed file.
+console.log(`Downloading ${assetURL}`);
+const file = fs.createWriteStream(assetName);
+get(assetURL, response => {
+  if (response.statusCode > 299) {
+    throw new Error([
+      'Download failed',
+      '',
+      `url: ${url}`,
+      `status: ${response.statusCode}`,
+      `headers: ${JSON.stringify(response.headers, null, 2)}`,
+      '',
+    ].join('\n'));
+  }
+
+  response.pipe(file);
+});
+
+// Extract the file.
+file.on('finish', () => {
+  console.log(`Extracting ${assetName}`);
+  if (isWindows) {
+    execFileSync('7z', ['e', assetName]);
+  } else {
+    execFileSync('tar', ['xzf', assetName]);
+  }
+  fs.unlinkSync(assetName);
+  console.log(`Done`);
+});
+
+// Follow redirects.
+function get(url, callback) {
+  https.get(url, response => {
+    if (response.statusCode === 301 || response.statusCode === 302) {
+      get(response.headers.location, callback);
+    } else {
+      callback(response);
+    }
+  });
+}
diff --git a/cli/npm/package-lock.json b/cli/npm/package-lock.json
new file mode 100644
index 00000000..b78f1d4e
--- /dev/null
+++ b/cli/npm/package-lock.json
@@ -0,0 +1,5 @@
+{
+  "name": "tree-sitter-cli",
+  "version": "0.14.0-beta0",
+  "lockfileVersion": 1
+}
diff --git a/cli/npm/package.json b/cli/npm/package.json
new file mode 100644
index 00000000..01a50491
--- /dev/null
+++ b/cli/npm/package.json
@@ -0,0 +1,22 @@
+{
+  "name": "tree-sitter-cli",
+  "version": "0.14.0-beta0",
+  "author": "Max Brunsfeld",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "http://github.com/tree-sitter/tree-sitter.git"
+  },
+  "description": "CLI for generating fast incremental parsers",
+  "keywords": [
+    "parser",
+    "lexer"
+  ],
+  "main": "lib/api/index.js",
+  "scripts": {
+    "install": "./install.js"
+  },
+  "bin": {
+    "tree-sitter": "tree-sitter"
+  }
+}

From e7bb57550badeab50f3a44a607dab80f7a91069c Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 15 Jan 2019 19:18:33 -0800
Subject: [PATCH 152/208] Use gzip for release assets so they can easily be
 extracted from node

---
 .appveyor.yml        |  6 +++---
 .gitignore           | 12 +++++++++---
 .travis.yml          |  6 +++---
 cli/npm/.gitignore   |  3 +--
 cli/npm/install.js   | 40 ++++++++++++++++------------------------
 cli/npm/package.json |  2 +-
 6 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 0c9de3ac..07a72738 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -31,15 +31,15 @@ branches:
 
 before_deploy:
   - move target\release\tree-sitter.exe tree-sitter.exe
-  - 7z a tree-sitter-windows-%PLATFORM%.zip tree-sitter.exe
-  - appveyor PushArtifact tree-sitter-windows-%PLATFORM%.zip
+  - 7z a -tgzip tree-sitter-windows-%PLATFORM%.gz tree-sitter.exe
+  - appveyor PushArtifact tree-sitter-windows-%PLATFORM%.gz
 
 deploy:
   description: ''
   provider: GitHub
   auth_token:
     secure: VC9ntV5+inKoNteZyLQksKzWMKXF46P+Jx3JHKVSfF+o1rWtZn2iIHAVsQv5LaUi
-  artifact: /tree-sitter-windows-.*.zip/
+  artifact: /tree-sitter-windows-.*/
   draft: true
   force_update: true
   on:
diff --git a/.gitignore b/.gitignore
index 23c82fe6..bcb55844 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,17 @@
 log.html
+
 .idea
 *.xcodeproj
-*.a
-*.o
+
 fuzz-results
+
 test/fixtures/grammars/*
 !test/fixtures/grammars/.gitkeep
 
 /target
-**/*.rs.bk
+*.rs.bk
+*.a
+*.o
+*.obj
+*.exp
+*.lib
diff --git a/.travis.yml b/.travis.yml
index 55fc9276..722a4dc9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,15 +22,15 @@ branches:
   - /\d+\.\d+\.\d+/
 
 before_deploy:
-  - mv target/release/tree-sitter .
-  - tar czf tree-sitter-${TRAVIS_OS_NAME}-x64.tar.gz tree-sitter
+  - cp target/release/tree-sitter .
+  - gzip --suffix "-${TRAVIS_OS_NAME}-x64.gz" tree-sitter
 
 deploy:
   provider: releases
   api_key:
     secure: "cAd2mQP+Q55v3zedo5ZyOVc3hq3XKMW93lp5LuXV6CYKYbIhkyfym4qfs+C9GJQiIP27cnePYM7B3+OMIFwSPIgXHWWSsuloMtDgYSc/PAwb2dZnJqAyog3BohW/QiGTSnvbVlxPF6P9RMQU6+JP0HJzEJy6QBTa4Und/j0jm24="
   file_glob: true
-  file: "tree-sitter-*.tar.gz"
+  file: "tree-sitter-*.gz"
   draft: true
   overwrite: true
   skip_cleanup: true
diff --git a/cli/npm/.gitignore b/cli/npm/.gitignore
index 306613e7..f0475945 100644
--- a/cli/npm/.gitignore
+++ b/cli/npm/.gitignore
@@ -1,4 +1,3 @@
 tree-sitter
 tree-sitter.exe
-*.tar.gz
-*.zip
+*.gz
diff --git a/cli/npm/install.js b/cli/npm/install.js
index 5564ce98..d73c51cb 100755
--- a/cli/npm/install.js
+++ b/cli/npm/install.js
@@ -1,12 +1,11 @@
 #!/usr/bin/env node
 
 const fs = require('fs');
+const zlib = require('zlib');
 const https = require('https');
-const execFileSync = require('child_process').execFileSync;
 const packageJSON = require('./package.json');
 
 // Determine the URL of the file.
-const isWindows = process.platform === 'win32';
 const platformName = {
   'darwin': 'osx',
   'linux': 'linux',
@@ -15,48 +14,41 @@ const platformName = {
 if (!platformName) {
   throw new Error(`Cannot install tree-sitter-cli for platform ${process.platform}`);
 }
+
+const archName = {
+  'x64': 'x64',
+  'x86': 'x86',
+  'ia32': 'x86'
+}[process.arch];
+if (!archName) {
+  throw new Error(`Cannot install tree-sitter-cli for architecture ${process.arch}`);
+}
+
 const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/${packageJSON.version}`;
-const assetExtension = isWindows ? 'zip' : 'tar.gz';
-const assetName = `tree-sitter-${platformName}-${process.arch}.${assetExtension}`;
+const assetName = `tree-sitter-${platformName}-${archName}.gz`;
 const assetURL = `${releaseURL}/${assetName}`;
 
 // Remove previously-downloaded files.
-const executableName = isWindows ? 'tree-sitter.exe' : 'tree-sitter';
+const executableName = process.platform === 'win32' ? 'tree-sitter.exe' : 'tree-sitter';
 if (fs.existsSync(executableName)) {
   fs.unlinkSync(executableName);
 }
-if (fs.existsSync(assetName)) {
-  fs.unlinkSync(assetName);
-}
 
 // Download the compressed file.
 console.log(`Downloading ${assetURL}`);
-const file = fs.createWriteStream(assetName);
+const file = fs.createWriteStream(executableName);
 get(assetURL, response => {
   if (response.statusCode > 299) {
     throw new Error([
       'Download failed',
       '',
-      `url: ${url}`,
+      `url: ${assetURL}`,
       `status: ${response.statusCode}`,
       `headers: ${JSON.stringify(response.headers, null, 2)}`,
       '',
     ].join('\n'));
   }
-
-  response.pipe(file);
-});
-
-// Extract the file.
-file.on('finish', () => {
-  console.log(`Extracting ${assetName}`);
-  if (isWindows) {
-    execFileSync('7z', ['e', assetName]);
-  } else {
-    execFileSync('tar', ['xzf', assetName]);
-  }
-  fs.unlinkSync(assetName);
-  console.log(`Done`);
+  response.pipe(zlib.createGunzip()).pipe(file);
 });
 
 // Follow redirects.
diff --git a/cli/npm/package.json b/cli/npm/package.json
index 01a50491..e459b551 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -14,7 +14,7 @@
   ],
   "main": "lib/api/index.js",
   "scripts": {
-    "install": "./install.js"
+    "install": "install.js"
   },
   "bin": {
     "tree-sitter": "tree-sitter"

From 564c5e39b66a6d9406852e823262872c5ace6cec Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 16 Jan 2019 12:42:02 -0800
Subject: [PATCH 153/208] 0.14.0-beta1

---
 Cargo.lock           | 2 +-
 cli/Cargo.toml       | 2 +-
 cli/npm/package.json | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 464cd050..09bc9ea0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -771,7 +771,7 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-cli"
-version = "0.1.0"
+version = "0.14.0-beta1"
 dependencies = [
  "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index b6226917..e1c83583 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tree-sitter-cli"
-version = "0.1.0"
+version = "0.14.0-beta1"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 
diff --git a/cli/npm/package.json b/cli/npm/package.json
index e459b551..de64c70f 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -1,6 +1,6 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.14.0-beta0",
+  "version": "0.14.0-beta1",
   "author": "Max Brunsfeld",
   "license": "MIT",
   "repository": {

From a0a3903f767b9d421aa50bfde65eca13d40924b9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 16 Jan 2019 13:53:01 -0800
Subject: [PATCH 154/208] Generate binding.gyp, binding.cc, and index.js

---
 cli/src/generate/mod.rs                | 29 +++++++++++++++++++-------
 cli/src/generate/npm_files.rs          | 18 ++++++++++++++++
 cli/src/generate/templates/binding.cc  | 28 +++++++++++++++++++++++++
 cli/src/generate/templates/binding.gyp | 18 ++++++++++++++++
 cli/src/generate/templates/index.js    |  9 ++++++++
 cli/src/tests/corpuses.rs              |  2 +-
 6 files changed, 96 insertions(+), 8 deletions(-)
 create mode 100644 cli/src/generate/npm_files.rs
 create mode 100644 cli/src/generate/templates/binding.cc
 create mode 100644 cli/src/generate/templates/binding.gyp
 create mode 100644 cli/src/generate/templates/index.js

diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 1593c0da..f42dff96 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -12,6 +12,7 @@ use std::process::{Command, Stdio};
 mod build_tables;
 mod grammars;
 mod nfa;
+mod npm_files;
 mod parse_grammar;
 mod prepare_grammar;
 mod properties;
@@ -36,17 +37,30 @@ pub fn generate_parser_in_directory(
     if !properties_only {
         let grammar_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
         let grammar_json = load_grammar_file(&grammar_path);
-        let c_code =
+        let (language_name, c_code) =
             generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
-        fs::create_dir_all("src")?;
-        fs::write(repo_path.join("src").join("parser.c"), c_code)?;
+        let repo_src_path = repo_path.join("src");
+        fs::create_dir_all(&repo_src_path)?;
+        fs::write(&repo_src_path.join("parser.c"), c_code)?;
+        let binding_cc_path = repo_src_path.join("binding.cc");
+        if !binding_cc_path.exists() {
+            fs::write(&binding_cc_path, npm_files::binding_cc(&language_name))?;
+        }
+        let binding_gyp_path = repo_path.join("binding.gyp");
+        if !binding_gyp_path.exists() {
+            fs::write(&binding_gyp_path, npm_files::binding_gyp(&language_name))?;
+        }
+        let index_js_path = repo_path.join("index.js");
+        if !index_js_path.exists() {
+            fs::write(&index_js_path, npm_files::index_js(&language_name))?;
+        }
     }
     properties::generate_property_sheets(repo_path)?;
     Ok(())
 }
 
 #[cfg(test)]
-pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<String> {
+pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<(String, String)> {
     let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
     generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
 }
@@ -55,7 +69,7 @@ fn generate_parser_for_grammar_with_opts(
     grammar_json: &str,
     minimize: bool,
     state_ids_to_log: Vec<usize>,
-) -> Result<String> {
+) -> Result<(String, String)> {
     let input_grammar = parse_grammar(grammar_json)?;
     let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
         prepare_grammar(&input_grammar)?;
@@ -67,7 +81,7 @@ fn generate_parser_for_grammar_with_opts(
         minimize,
         state_ids_to_log,
     )?;
-    Ok(render_c_code(
+    let c_code = render_c_code(
         &input_grammar.name,
         parse_table,
         main_lex_table,
@@ -76,7 +90,8 @@ fn generate_parser_for_grammar_with_opts(
         syntax_grammar,
         lexical_grammar,
         simple_aliases,
-    ))
+    );
+    Ok((input_grammar.name, c_code))
 }
 
 fn load_grammar_file(grammar_path: &PathBuf) -> String {
diff --git a/cli/src/generate/npm_files.rs b/cli/src/generate/npm_files.rs
new file mode 100644
index 00000000..5f813c88
--- /dev/null
+++ b/cli/src/generate/npm_files.rs
@@ -0,0 +1,18 @@
+use std::str;
+
+const BINDING_CC_TEMPLATE: &'static str = include_str!("./templates/binding.cc");
+const BINDING_GYP_TEMPLATE: &'static str = include_str!("./templates/binding.gyp");
+const INDEX_JS_TEMPLATE: &'static str = include_str!("./templates/index.js");
+const PARSER_NAME_PLACEHOLDER: &'static str = "PARSER_NAME";
+
+pub fn binding_cc(parser_name: &str) -> String {
+    BINDING_CC_TEMPLATE.replace(PARSER_NAME_PLACEHOLDER, parser_name)
+}
+
+pub fn binding_gyp(parser_name: &str) -> String {
+    BINDING_GYP_TEMPLATE.replace(PARSER_NAME_PLACEHOLDER, parser_name)
+}
+
+pub fn index_js(parser_name: &str) -> String {
+    INDEX_JS_TEMPLATE.replace(PARSER_NAME_PLACEHOLDER, parser_name)
+}
diff --git a/cli/src/generate/templates/binding.cc b/cli/src/generate/templates/binding.cc
new file mode 100644
index 00000000..18853f55
--- /dev/null
+++ b/cli/src/generate/templates/binding.cc
@@ -0,0 +1,28 @@
+#include "tree_sitter/parser.h"
+#include <node.h>
+#include "nan.h"
+
+using namespace v8;
+
+extern "C" TSLanguage * tree_sitter_PARSER_NAME();
+
+namespace {
+
+NAN_METHOD(New) {}
+
+void Init(Handle<Object> exports, Handle<Object> module) {
+  Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
+  tpl->SetClassName(Nan::New("Language").ToLocalChecked());
+  tpl->InstanceTemplate()->SetInternalFieldCount(1);
+
+  Local<Function> constructor = tpl->GetFunction();
+  Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
+  Nan::SetInternalFieldPointer(instance, 0, tree_sitter_PARSER_NAME());
+
+  instance->Set(Nan::New("name").ToLocalChecked(), Nan::New("PARSER_NAME").ToLocalChecked());
+  module->Set(Nan::New("exports").ToLocalChecked(), instance);
+}
+
+NODE_MODULE(tree_sitter_PARSER_NAME_binding, Init)
+
+}  // namespace
diff --git a/cli/src/generate/templates/binding.gyp b/cli/src/generate/templates/binding.gyp
new file mode 100644
index 00000000..f273a007
--- /dev/null
+++ b/cli/src/generate/templates/binding.gyp
@@ -0,0 +1,18 @@
+{
+  "targets": [
+    {
+      "target_name": "tree_sitter_PARSER_NAME_binding",
+      "include_dirs": [
+        "<!(node -e \"require('nan')\")",
+        "src"
+      ],
+      "sources": [
+        "src/parser.c",
+        "src/binding.cc"
+      ],
+      "cflags_c": [
+        "-std=c99",
+      ]
+    }
+  ]
+}
diff --git a/cli/src/generate/templates/index.js b/cli/src/generate/templates/index.js
new file mode 100644
index 00000000..19f84cde
--- /dev/null
+++ b/cli/src/generate/templates/index.js
@@ -0,0 +1,9 @@
+try {
+  module.exports = require("./build/Release/tree_sitter_PARSER_NAME_binding");
+} catch (error) {
+  try {
+    module.exports = require("./build/Debug/tree_sitter_PARSER_NAME_binding");
+  } catch (_) {
+    throw error
+  }
+}
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index 707158cf..173426d6 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -126,7 +126,7 @@ fn test_feature_corpus_files() {
             }
         } else {
             let corpus_path = test_path.join("corpus.txt");
-            let c_code = generate_result.unwrap();
+            let c_code = generate_result.unwrap().1;
             let language = get_test_language(language_name, c_code, &test_path);
             let test = parse_tests(&corpus_path).unwrap();
             parser.set_language(language).unwrap();

From 87c068faa9980bbf71b205b9a04b9bd3262836ab Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 16 Jan 2019 13:53:20 -0800
Subject: [PATCH 155/208] Remove unused dependencies

---
 .appveyor.yml  |   2 -
 Cargo.lock     | 270 -------------------------------------------------
 cli/Cargo.toml |   2 -
 3 files changed, 274 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 07a72738..26ae0691 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -49,5 +49,3 @@ cache:
   - target
   - test\fixtures\grammars
   - C:\Users\appveyor\.cargo
-  - C:\cargo\registry
-  - C:\cargo\git
diff --git a/Cargo.lock b/Cargo.lock
index 09bc9ea0..2edecfc7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -123,44 +123,6 @@ name = "constant_time_eq"
 version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "crossbeam-channel"
-version = "0.2.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "crossbeam-epoch"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "arrayvec 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "crossbeam-utils"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "crossbeam-utils"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "difference"
 version = "2.0.0"
@@ -196,11 +158,6 @@ dependencies = [
  "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "fnv"
-version = "1.0.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "fuchsia-zircon"
 version = "0.3.3"
@@ -215,18 +172,6 @@ name = "fuchsia-zircon-sys"
 version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "globset"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
- "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "hashbrown"
 version = "0.1.7"
@@ -236,23 +181,6 @@ dependencies = [
  "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "ignore"
-version = "0.4.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "indexmap"
 version = "1.0.2"
@@ -282,29 +210,6 @@ dependencies = [
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "libsqlite3-sys"
-version = "0.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
- "vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "linked-hash-map"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "lock_api"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "log"
 version = "0.4.6"
@@ -313,14 +218,6 @@ dependencies = [
  "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "lru-cache"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "linked-hash-map 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "memchr"
 version = "2.1.1"
@@ -331,11 +228,6 @@ dependencies = [
  "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "memoffset"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "nodrop"
 version = "0.1.13"
@@ -371,40 +263,6 @@ name = "num-traits"
 version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "owning_ref"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "parking_lot"
-version = "0.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "parking_lot_core"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "pkg-config"
-version = "0.3.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "proc-macro2"
 version = "0.4.24"
@@ -431,18 +289,6 @@ dependencies = [
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "rand"
-version = "0.5.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand_core 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "rand"
 version = "0.6.4"
@@ -469,14 +315,6 @@ dependencies = [
  "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "rand_core"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "rand_core"
 version = "0.3.0"
@@ -592,17 +430,6 @@ dependencies = [
  "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "rusqlite"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "libsqlite3-sys 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "lru-cache 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "rustc-demangle"
 version = "0.1.9"
@@ -621,14 +448,6 @@ name = "ryu"
 version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "same-file"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "scoped_threadpool"
 version = "0.1.9"
@@ -683,19 +502,6 @@ name = "smallbitvec"
 version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "smallvec"
-version = "0.6.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[[package]]
-name = "stable_deref_trait"
-version = "1.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "strsim"
 version = "0.7.0"
@@ -748,16 +554,6 @@ dependencies = [
  "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
-[[package]]
-name = "time"
-version = "0.1.40"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
- "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "tree-sitter"
 version = "0.3.5"
@@ -779,14 +575,12 @@ dependencies = [
  "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
- "ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -809,24 +603,11 @@ name = "unicode-xid"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "unreachable"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "utf8-ranges"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "vcpkg"
-version = "0.2.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "vec_map"
 version = "0.8.1"
@@ -837,21 +618,6 @@ name = "version_check"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "void"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "walkdir"
-version = "2.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "winapi"
 version = "0.3.6"
@@ -866,14 +632,6 @@ name = "winapi-i686-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
-[[package]]
-name = "winapi-util"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
@@ -896,48 +654,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
 "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
 "checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e"
-"checksum crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7b85741761b7f160bc5e7e0c14986ef685b7f8bf9b7ad081c60c604bb4649827"
-"checksum crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2449aaa4ec7ef96e5fb24db16024b935df718e9ae1cec0a1e68feeca2efca7b8"
-"checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015"
-"checksum crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c55913cc2799171a550e307918c0a360e8c16004820291bf3b638969b4a01816"
 "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
 "checksum dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "88972de891f6118092b643d85a0b28e0678e0f948d7f879aa32f2d5aafe97d2a"
 "checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7"
 "checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596"
-"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
 "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
 "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
-"checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
 "checksum hashbrown 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "64b7d419d0622ae02fe5da6b9a5e1964b610a65bb37923b976aeebb6dbb8f86e"
-"checksum ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36ecfc5ad80f0b1226df948c562e2cddd446096be3f644c95106400eae8a5e01"
 "checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
 "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
 "checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
 "checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311"
 "checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2"
-"checksum libsqlite3-sys 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d3711dfd91a1081d2458ad2d06ea30a8755256e74038be2ad927d94e1c955ca8"
-"checksum linked-hash-map 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7860ec297f7008ff7a1e3382d7f7e1dcd69efc94751a2284bafc3d013c2aa939"
-"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c"
 "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
-"checksum lru-cache 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4d06ff7ff06f729ce5f4e227876cb88d10bc59cd4ae1e09fbb2bde15c850dc21"
 "checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
-"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
 "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
 "checksum nom 4.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9c349f68f25f596b9f44cf0e7c69752a5c633b0550c3ff849518bfba0233774a"
 "checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
 "checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10"
 "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
-"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13"
-"checksum parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0802bff09003b291ba756dc7e79313e51cc31667e94afbe847def490424cde5"
-"checksum parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad7f7e6ebdc79edff6fdcb87a55b620174f7a989e3eb31b65231f4af57f00b8c"
-"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
 "checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09"
 "checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c"
 "checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
-"checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c"
 "checksum rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5"
 "checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
-"checksum rand_core 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1961a422c4d189dfb50ffa9320bf1f2a9bd54ecb92792fb9477f99a1045f3372"
 "checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db"
 "checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
 "checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
@@ -951,11 +691,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f"
 "checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1"
 "checksum rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7a5dde55023a6c19470f7aeb59f75f897d8b80cbe00d61dfcaf7bbbe3de4c0a6"
-"checksum rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c9d9118f1ce84d8d0b67f9779936432fb42bb620cef2122409d786892cce9a3c"
 "checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
 "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
 "checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
-"checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267"
 "checksum scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
 "checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
 "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
@@ -964,26 +702,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
 "checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
 "checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
-"checksum smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b73ea3738b47563803ef814925e69be00799a8c07420be8b996f8e98fb2336db"
-"checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8"
 "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
 "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
 "checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015"
 "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
 "checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6"
 "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
-"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b"
 "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
 "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
 "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
-"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
 "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
-"checksum vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "def296d3eb3b12371b2c7d0e83bfe1403e4db2d7a0bba324a12b21c4ee13143d"
 "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
 "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
-"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
-"checksum walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d9d7ed3431229a144296213105a390676cc49c9b6a72bd19f3176c98e129fa1"
 "checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
 "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-"checksum winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "afc5508759c5bf4285e61feb862b6083c8480aec864fa17a81fdec6f69b461ab"
 "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index e1c83583..edd14616 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -17,9 +17,7 @@ smallbitvec = "2.3.0"
 clap = "2.32"
 dirs = "1.0.2"
 hashbrown = "0.1"
-ignore = "0.4.4"
 libloading = "0.5"
-rusqlite = "0.14.0"
 serde = "1.0"
 serde_derive = "1.0"
 regex-syntax = "0.6.4"

From 4689cadf9d12f7f2b62bbb393b53ed1df10fc17a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 16 Jan 2019 13:59:37 -0800
Subject: [PATCH 156/208] Make downloaded binary executable in intsall script

---
 cli/npm/install.js        | 4 ++++
 cli/npm/package-lock.json | 2 +-
 cli/npm/package.json      | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/cli/npm/install.js b/cli/npm/install.js
index d73c51cb..9350f682 100755
--- a/cli/npm/install.js
+++ b/cli/npm/install.js
@@ -51,6 +51,10 @@ get(assetURL, response => {
   response.pipe(zlib.createGunzip()).pipe(file);
 });
 
+file.on('finish', () => {
+  fs.chmodSync(executableName, '755');
+});
+
 // Follow redirects.
 function get(url, callback) {
   https.get(url, response => {
diff --git a/cli/npm/package-lock.json b/cli/npm/package-lock.json
index b78f1d4e..685806c4 100644
--- a/cli/npm/package-lock.json
+++ b/cli/npm/package-lock.json
@@ -1,5 +1,5 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.14.0-beta0",
+  "version": "0.14.0-beta1",
   "lockfileVersion": 1
 }
diff --git a/cli/npm/package.json b/cli/npm/package.json
index de64c70f..0155c8da 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -14,7 +14,7 @@
   ],
   "main": "lib/api/index.js",
   "scripts": {
-    "install": "install.js"
+    "install": "node install.js"
   },
   "bin": {
     "tree-sitter": "tree-sitter"

From ae07d2d6e4d136e5d7269a4b6c1886e76b520327 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 16 Jan 2019 14:09:19 -0800
Subject: [PATCH 157/208] Build 32-bit executables on 32-bit appveyor builds

---
 .appveyor.yml     | 3 ++-
 cli/src/loader.rs | 9 +++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 26ae0691..3de89da7 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -2,7 +2,8 @@ build: false
 install:
   # Install rust
   - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
-  - rustup-init -yv --default-toolchain stable
+  - IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain stable --default-host i686-pc-windows-msvc
+  - IF "%PLATFORM%" == "x64" rustup-init -y --default-toolchain stable --default-host x86_64-pc-windows-msvc
   - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
   - rustc -vV
   - cargo -vV
diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index afb18f9e..7aa0ca50 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -15,6 +15,8 @@ const DYLIB_EXTENSION: &'static str = "so";
 #[cfg(windows)]
 const DYLIB_EXTENSION: &'static str = "dll";
 
+const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
+
 struct LanguageRepo {
     name: String,
     path: PathBuf,
@@ -156,8 +158,8 @@ impl Loader {
                 .cpp(true)
                 .opt_level(2)
                 .cargo_metadata(false)
-                .target(env!("BUILD_TARGET"))
-                .host(env!("BUILD_TARGET"));
+                .target(BUILD_TARGET)
+                .host(BUILD_TARGET);
             let compiler = config.get_compiler();
             let mut command = Command::new(compiler.path());
             for (key, value) in compiler.env() {
@@ -165,6 +167,9 @@ impl Loader {
             }
 
             if cfg!(windows) {
+                if !BUILD_TARGET.contains("64") {
+                    command.env("Platform", "x86");
+                }
                 command
                     .args(&["/nologo", "/LD", "/I"])
                     .arg(header_path)

From e4b9d9dfa9cd2873df2ea70a059554a44d3d8aa5 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 16 Jan 2019 20:56:36 -0800
Subject: [PATCH 158/208] Fix token conflict detection bugs

---
 cli/src/generate/build_tables/item.rs                 | 7 ++++---
 cli/src/generate/build_tables/minimize_parse_table.rs | 5 +++--
 cli/src/generate/build_tables/mod.rs                  | 2 +-
 cli/src/loader.rs                                     | 3 ---
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs
index 0222ac21..6c74d465 100644
--- a/cli/src/generate/build_tables/item.rs
+++ b/cli/src/generate/build_tables/item.rs
@@ -236,7 +236,7 @@ impl<'a> ParseItemSet<'a> {
             Err(i) => {
                 self.entries.insert(i, (item, lookaheads.clone()));
                 &mut self.entries[i].1
-            },
+            }
             Ok(i) => {
                 self.entries[i].1.insert_all(lookaheads);
                 &mut self.entries[i].1
@@ -248,8 +248,9 @@ impl<'a> ParseItemSet<'a> {
         let mut previous_variable_index = u32::MAX;
         let mut previous_step_index = u32::MAX;
         for (item, _) in self.entries.iter() {
-            if item.step().is_none() && item.variable_index != previous_variable_index
-                || item.step_index != previous_step_index
+            if item.step().is_some()
+                && (item.variable_index != previous_variable_index
+                    || item.step_index != previous_step_index)
             {
                 h.write_u32(item.variable_index);
                 h.write_u32(item.step_index);
diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs
index a5cb5f81..bb9b26eb 100644
--- a/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@@ -228,8 +228,9 @@ impl<'a> Minimizer<'a> {
         // Do not add a token if it conflicts with an existing token.
         if token.is_terminal() {
             for existing_token in state.terminal_entries.keys() {
-                if (is_word_token && self.keywords.contains(existing_token))
-                    || is_keyword && self.syntax_grammar.word_token.as_ref() == Some(existing_token)
+                if (is_word_token || is_keyword)
+                    && (self.keywords.contains(existing_token)
+                        || self.syntax_grammar.word_token.as_ref() == Some(existing_token))
                 {
                     continue;
                 }
diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index 28b18109..7811176b 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -232,7 +232,7 @@ fn identify_keywords(
         .filter(|token| {
             for other_token in keywords.iter() {
                 if other_token != *token
-                    && token_conflict_map.does_match_same_string(token.index, other_token.index)
+                    && token_conflict_map.does_match_same_string(other_token.index, token.index)
                 {
                     info!(
                         "Keywords - exclude {} because it matches the same string as {}",
diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index 7aa0ca50..70056404 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -167,9 +167,6 @@ impl Loader {
             }
 
             if cfg!(windows) {
-                if !BUILD_TARGET.contains("64") {
-                    command.env("Platform", "x86");
-                }
                 command
                     .args(&["/nologo", "/LD", "/I"])
                     .arg(header_path)

From d903371709c8ae442c7ec162e9ea227ce59a2863 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 10:07:58 -0800
Subject: [PATCH 159/208] Remove noisy logging

---
 cli/src/generate/build_tables/build_lex_table.rs | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs
index 03ec0c7b..38f56cc3 100644
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@@ -184,21 +184,13 @@ impl<'a> LexTableBuilder<'a> {
             completion = Some((id, prec));
         }
 
-        info!(
-            "lex state: {}, completion: {:?}",
-            state_id,
-            completion.map(|(id, prec)| (&self.lexical_grammar.variables[id].name, prec))
-        );
-
         let transitions = self.cursor.transitions();
         let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);
-        info!("lex state: {}, transitions: {:?}", state_id, transitions);
 
         // If EOF is a valid lookahead token, add a transition predicated on the null
         // character that leads to the empty set of NFA states.
         if eof_valid {
             let (next_state_id, _) = self.add_state(Vec::new(), false);
-            info!("lex state: {}, successor: EOF", state_id);
             self.table.states[state_id].advance_actions.push((
                 CharacterSet::empty().add_char('\0'),
                 AdvanceAction {

From d52a11fd03a7d348275422990fb4ab8fc23ca2fa Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 10:09:03 -0800
Subject: [PATCH 160/208] Avoid using a string literal to pass grammar path to
 JS

Backslashes in windows path were getting interpeted as escape characters.
---
 cli/src/generate/dsl.js |  3 +++
 cli/src/generate/mod.rs | 11 +++--------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js
index fa60dfa7..950b2d3b 100644
--- a/cli/src/generate/dsl.js
+++ b/cli/src/generate/dsl.js
@@ -327,3 +327,6 @@ global.seq = seq;
 global.sym = sym;
 global.token = token;
 global.grammar = grammar;
+
+const result = require(process.env.TREE_SITTER_GRAMMAR_PATH);
+console.log(JSON.stringify(result, null, 2));
diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index f42dff96..baaeb182 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -104,23 +104,18 @@ fn load_grammar_file(grammar_path: &PathBuf) -> String {
 
 fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
     let mut node_process = Command::new("node")
+        .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
         .stdin(Stdio::piped())
         .stdout(Stdio::piped())
         .spawn()
         .expect("Failed to run `node`");
 
-    let js_prelude = include_str!("./dsl.js");
     let mut node_stdin = node_process
         .stdin
         .take()
         .expect("Failed to open stdin for node");
-    write!(
-        node_stdin,
-        "{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
-        js_prelude,
-        grammar_path.to_str().unwrap()
-    )
-    .expect("Failed to write to node's stdin");
+    let javascript_code = include_bytes!("./dsl.js");
+    node_stdin.write(javascript_code).expect("Failed to write to node's stdin");
     drop(node_stdin);
     let output = node_process
         .wait_with_output()

From 3d11388cd10d2e69191a6e255e47afce16eeab69 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 12:40:21 -0800
Subject: [PATCH 161/208] Fix test subcommand bugs

* Log session was dropped before the parser
* Whitespace between close parens was not stripped
---
 cli/src/test.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cli/src/test.rs b/cli/src/test.rs
index 4d6034e5..3a40eb83 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -44,12 +44,12 @@ pub fn run_tests_at_path(
     filter: Option<&str>,
 ) -> Result<()> {
     let test_entry = parse_tests(path)?;
-    let mut log_session = None;
+    let mut _log_session = None;
     let mut parser = Parser::new();
     parser.set_language(language)?;
 
     if debug_graph {
-        log_session = Some(util::log_graphs(&mut parser, "log.html")?);
+        _log_session = Some(util::log_graphs(&mut parser, "log.html")?);
     } else if debug {
         parser.set_logger(Some(Box::new(|log_type, message| {
             if log_type == LogType::Lex {
@@ -82,7 +82,6 @@ pub fn run_tests_at_path(
         }
     }
 
-    drop(log_session);
     Ok(())
 }
 
@@ -200,6 +199,7 @@ fn parse_test_content(name: String, content: String) -> TestEntry {
                 if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) {
                     let input = bytes[previous_header_end..divider_start].to_vec();
                     let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string();
+                    let output = output.replace(" )", ")");
                     children.push(TestEntry::Example {
                         name: previous_name,
                         input,

From 9f7079c9c50abd43cceda31e22b6871ac4db6847 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 12:44:14 -0800
Subject: [PATCH 162/208] Ensure that the word token has a low numerical index

Fixes https://github.com/tree-sitter/tree-sitter/issues/258
---
 cli/src/generate/build_tables/mod.rs               | 10 +++++-----
 cli/src/generate/prepare_grammar/extract_tokens.rs | 10 ++++++++++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index 7811176b..3d7b6fd0 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -172,17 +172,17 @@ fn populate_used_symbols(
             non_terminal_usages[symbol.index] = true;
         }
     }
-    for (i, value) in external_usages.into_iter().enumerate() {
-        if value {
-            parse_table.symbols.push(Symbol::external(i));
-        }
-    }
     parse_table.symbols.push(Symbol::end());
     for (i, value) in terminal_usages.into_iter().enumerate() {
         if value {
             parse_table.symbols.push(Symbol::terminal(i));
         }
     }
+    for (i, value) in external_usages.into_iter().enumerate() {
+        if value {
+            parse_table.symbols.push(Symbol::external(i));
+        }
+    }
     for (i, value) in non_terminal_usages.into_iter().enumerate() {
         if value {
             parse_table.symbols.push(Symbol::non_terminal(i));
diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs
index ae07763b..72df21b2 100644
--- a/cli/src/generate/prepare_grammar/extract_tokens.rs
+++ b/cli/src/generate/prepare_grammar/extract_tokens.rs
@@ -15,6 +15,16 @@ pub(super) fn extract_tokens(
         extracted_usage_counts: Vec::new(),
     };
 
+    // Extract the word token first to give it a low numerical index. This ensure that
+    // it can be stored in a subtree with no heap allocations, even for grammars with
+    // very large numbers of tokens. This is an optimization, but also important to
+    // ensure that a subtree's symbol can be successfully reassigned to the word token
+    // without having to move the subtree to the heap.
+    // See https://github.com/tree-sitter/tree-sitter/issues/258
+    if let Some(token) = grammar.word_token {
+        extractor.extract_tokens_in_variable(&mut grammar.variables[token.index]);
+    }
+
     for mut variable in grammar.variables.iter_mut() {
         extractor.extract_tokens_in_variable(&mut variable);
     }

From bb5dedfb1e43440267cab845b94e78ba5fedbaa3 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 12:44:35 -0800
Subject: [PATCH 163/208] Fix another token conflict detection bug

---
 cli/src/generate/build_tables/token_conflicts.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs
index 13c69c19..1f89022a 100644
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@@ -286,6 +286,9 @@ fn compute_conflict_status(
                         if transition.characters.does_intersect(&following_chars[i]) {
                             result.1.does_match_valid_continuation = true;
                         }
+                        if transition.is_separator || has_sep {
+                            result.1.does_match_separators = true;
+                        }
                     }
                 }
             }

From 8f4096e5cb20c508ceae368bcdbe69b72244281f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 12:50:30 -0800
Subject: [PATCH 164/208] Give more informative error messages when failing to
 write files

---
 cli/src/generate/mod.rs        | 38 ++++++++++++++++++++--------------
 cli/src/generate/properties.rs |  4 +++-
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index baaeb182..062a9e6b 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -2,7 +2,7 @@ use self::build_tables::build_tables;
 use self::parse_grammar::parse_grammar;
 use self::prepare_grammar::prepare_grammar;
 use self::render::render_c_code;
-use crate::error::Result;
+use crate::error::{Error, Result};
 use regex::{Regex, RegexBuilder};
 use std::fs;
 use std::io::Write;
@@ -41,19 +41,17 @@ pub fn generate_parser_in_directory(
             generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
         let repo_src_path = repo_path.join("src");
         fs::create_dir_all(&repo_src_path)?;
-        fs::write(&repo_src_path.join("parser.c"), c_code)?;
-        let binding_cc_path = repo_src_path.join("binding.cc");
-        if !binding_cc_path.exists() {
-            fs::write(&binding_cc_path, npm_files::binding_cc(&language_name))?;
-        }
-        let binding_gyp_path = repo_path.join("binding.gyp");
-        if !binding_gyp_path.exists() {
-            fs::write(&binding_gyp_path, npm_files::binding_gyp(&language_name))?;
-        }
-        let index_js_path = repo_path.join("index.js");
-        if !index_js_path.exists() {
-            fs::write(&index_js_path, npm_files::index_js(&language_name))?;
-        }
+        fs::write(&repo_src_path.join("parser.c"), c_code)
+            .map_err(|e| format!("Failed to write parser.c: {}", e))?;
+        ensure_file(&repo_src_path.join("binding.cc"), || {
+            npm_files::binding_cc(&language_name)
+        })?;
+        ensure_file(&repo_path.join("binding.gyp"), || {
+            npm_files::binding_gyp(&language_name)
+        })?;
+        ensure_file(&repo_path.join("index.js"), || {
+            npm_files::index_js(&language_name)
+        })?;
     }
     properties::generate_property_sheets(repo_path)?;
     Ok(())
@@ -115,7 +113,9 @@ fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
         .take()
         .expect("Failed to open stdin for node");
     let javascript_code = include_bytes!("./dsl.js");
-    node_stdin.write(javascript_code).expect("Failed to write to node's stdin");
+    node_stdin
+        .write(javascript_code)
+        .expect("Failed to write to node's stdin");
     drop(node_stdin);
     let output = node_process
         .wait_with_output()
@@ -128,3 +128,11 @@ fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
 
     String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
 }
+
+fn ensure_file(path: &PathBuf, f: impl Fn() -> String) -> Result<()> {
+    if path.exists() {
+        Ok(())
+    } else {
+        fs::write(path, f()).map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e)))
+    }
+}
diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index b16e698a..bf299af4 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -432,7 +432,9 @@ pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
             let property_sheet_json_path = src_dir_path
                 .join(css_path.file_name().unwrap())
                 .with_extension("json");
-            let property_sheet_json_file = File::create(property_sheet_json_path)?;
+            let property_sheet_json_file = File::create(&property_sheet_json_path).map_err(|e|
+                format!("Failed to create {:?}: {}", property_sheet_json_path, e)
+            )?;
             let mut writer = BufWriter::new(property_sheet_json_file);
             serde_json::to_writer_pretty(&mut writer, &sheet)?;
         }

From c27f776d418c3413b907435d0e8fe5a86f99f7db Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 12:50:30 -0800
Subject: [PATCH 165/208] Fix word token index issue in a different way

Refs https://github.com/tree-sitter/tree-sitter/issues/258
---
 cli/src/generate/build_tables/mod.rs               | 12 +++++++++++-
 cli/src/generate/prepare_grammar/extract_tokens.rs | 10 ----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index 3d7b6fd0..92fddefe 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -175,7 +175,17 @@ fn populate_used_symbols(
     parse_table.symbols.push(Symbol::end());
     for (i, value) in terminal_usages.into_iter().enumerate() {
         if value {
-            parse_table.symbols.push(Symbol::terminal(i));
+            // Assign the grammar's word token a low numerical index. This ensures that
+            // it can be stored in a subtree with no heap allocations, even for grammars with
+            // very large numbers of tokens. This is an optimization, but it's also important to
+            // ensure that a subtree's symbol can be successfully reassigned to the word token
+            // without having to move the subtree to the heap.
+            // See https://github.com/tree-sitter/tree-sitter/issues/258
+            if syntax_grammar.word_token.map_or(false, |t| t.index == i) {
+                parse_table.symbols.insert(1, Symbol::terminal(i));
+            } else {
+                parse_table.symbols.push(Symbol::terminal(i));
+            }
         }
     }
     for (i, value) in external_usages.into_iter().enumerate() {
diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs
index 72df21b2..ae07763b 100644
--- a/cli/src/generate/prepare_grammar/extract_tokens.rs
+++ b/cli/src/generate/prepare_grammar/extract_tokens.rs
@@ -15,16 +15,6 @@ pub(super) fn extract_tokens(
         extracted_usage_counts: Vec::new(),
     };
 
-    // Extract the word token first to give it a low numerical index. This ensure that
-    // it can be stored in a subtree with no heap allocations, even for grammars with
-    // very large numbers of tokens. This is an optimization, but also important to
-    // ensure that a subtree's symbol can be successfully reassigned to the word token
-    // without having to move the subtree to the heap.
-    // See https://github.com/tree-sitter/tree-sitter/issues/258
-    if let Some(token) = grammar.word_token {
-        extractor.extract_tokens_in_variable(&mut grammar.variables[token.index]);
-    }
-
     for mut variable in grammar.variables.iter_mut() {
         extractor.extract_tokens_in_variable(&mut variable);
     }

From 64fa721779e6b69c6d019db68505add1e9cc34a7 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 13:36:35 -0800
Subject: [PATCH 166/208] Don't skip branch builds on appveyor

---
 .appveyor.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 3de89da7..4a6721ad 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -25,11 +25,6 @@ test_script:
   - set TREE_SITTER_TEST=1
   - script\test.cmd
 
-branches:
-  only:
-    - master
-    - /\d+\.\d+\.\d+.*/
-
 before_deploy:
   - move target\release\tree-sitter.exe tree-sitter.exe
   - 7z a -tgzip tree-sitter-windows-%PLATFORM%.gz tree-sitter.exe

From 06cb829d37ebd5b975483f67d39dd62236908102 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 13:44:22 -0800
Subject: [PATCH 167/208] Try another way of building only tags and PRs on
 appveyor

---
 .appveyor.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.appveyor.yml b/.appveyor.yml
index 4a6721ad..3d6b7bd7 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,5 +1,8 @@
 build: false
 install:
+  # Terminate early unless building either a tag or a PR.
+  - if not defined APPVEYOR_REPO_TAG if not defined APPVEYOR_PULL_REQUEST_NUMBER appveyor exit
+
   # Install rust
   - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
   - IF "%PLATFORM%" == "x86" rustup-init -y --default-toolchain stable --default-host i686-pc-windows-msvc

From 14ecec1d4f7b5f99c18308fc394adcad8018c95a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 13:55:18 -0800
Subject: [PATCH 168/208] Fix early termination on appveyor

---
 .appveyor.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 3d6b7bd7..7d4acdc7 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,7 +1,7 @@
 build: false
 install:
   # Terminate early unless building either a tag or a PR.
-  - if not defined APPVEYOR_REPO_TAG if not defined APPVEYOR_PULL_REQUEST_NUMBER appveyor exit
+  - if not defined APPVEYOR_REPO_TAG if not "%APPVEYOR_REPO_BRANCH%" == "master" appveyor exit
 
   # Install rust
   - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe

From 53c8eaa4c2ad55daef39d877b4fbeb8daa42b162 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 15:15:34 -0800
Subject: [PATCH 169/208] Create a wrapper script for npm package to fix npm
 install issues

---
 cli/npm/.gitignore   |  1 +
 cli/npm/cli.js       | 12 ++++++++++++
 cli/npm/package.json |  2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100755 cli/npm/cli.js

diff --git a/cli/npm/.gitignore b/cli/npm/.gitignore
index f0475945..2d3aa23a 100644
--- a/cli/npm/.gitignore
+++ b/cli/npm/.gitignore
@@ -1,3 +1,4 @@
 tree-sitter
 tree-sitter.exe
 *.gz
+*.tgz
diff --git a/cli/npm/cli.js b/cli/npm/cli.js
new file mode 100755
index 00000000..404739fa
--- /dev/null
+++ b/cli/npm/cli.js
@@ -0,0 +1,12 @@
+#!/usr/bin/env node
+
+const path = require('path');
+const spawn = require("child_process").spawn;
+const executable = process.platform === 'win32'
+  ? 'tree-sitter.exe'
+  : 'tree-sitter';
+spawn(
+  path.join(__dirname, executable),
+  process.argv.slice(2),
+  {stdio: 'inherit'}
+).on('close', process.exit)
diff --git a/cli/npm/package.json b/cli/npm/package.json
index 0155c8da..230676f1 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -17,6 +17,6 @@
     "install": "node install.js"
   },
   "bin": {
-    "tree-sitter": "tree-sitter"
+    "tree-sitter": "cli.js"
   }
 }

From c204b5e72837fd5a0fb7aeb09e3c1af5080a4604 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 15:15:40 -0800
Subject: [PATCH 170/208] Print help/version info when run w/ no subcommand

---
 cli/build.rs    | 26 ++++++++++++++++++++++++++
 cli/src/main.rs |  6 ++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/cli/build.rs b/cli/build.rs
index e0ebd1c4..f8e62274 100644
--- a/cli/build.rs
+++ b/cli/build.rs
@@ -1,6 +1,32 @@
+use std::{io, env, fs};
+
 fn main() {
+    let git_sha = read_git_sha().unwrap();
+    println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
+
     println!(
         "cargo:rustc-env=BUILD_TARGET={}",
         std::env::var("TARGET").unwrap()
     );
 }
+
+fn read_git_sha() -> io::Result<String> {
+    let git_path = env::current_dir().unwrap().parent().unwrap().join(".git");
+    let git_head_path = git_path.join("HEAD");
+    println!("cargo:rerun-if-changed={}", git_head_path.to_str().unwrap());
+    let mut head_content = fs::read_to_string(&git_head_path)?;
+    assert!(head_content.ends_with("\n"));
+    head_content.pop();
+
+    if head_content.starts_with("ref: ") {
+        // We're on a branch. Read the SHA from the ref file.
+        head_content.replace_range(0.."ref: ".len(), "");
+        let ref_filename = git_path.join(&head_content);
+        println!("cargo:rerun-if-changed={}", ref_filename.to_str().unwrap());
+        fs::read_to_string(&ref_filename)
+    } else {
+        // We're not on a branch. The `HEAD` file itself contains the sha.
+        assert_eq!(head_content.len(), 40);
+        Ok(head_content)
+    }
+}
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 80a40758..1860ecc2 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -21,7 +21,7 @@ mod util;
 mod tests;
 
 use self::loader::Loader;
-use clap::{App, Arg, SubCommand};
+use clap::{App, AppSettings, Arg, SubCommand};
 use std::env;
 use std::fs;
 use std::path::Path;
@@ -37,7 +37,8 @@ fn main() {
 
 fn run() -> error::Result<()> {
     let matches = App::new("tree-sitter")
-        .version("0.1")
+        .version(concat!(env!("CARGO_PKG_VERSION"), " (", env!("BUILD_SHA"), ")"))
+        .setting(AppSettings::SubcommandRequiredElseHelp)
         .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
         .about("Generates and tests parsers")
         .subcommand(
@@ -77,6 +78,7 @@ fn run() -> error::Result<()> {
     let home_dir = dirs::home_dir().unwrap();
     let current_dir = env::current_dir().unwrap();
     let config_dir = home_dir.join(".tree-sitter");
+
     fs::create_dir_all(&config_dir).unwrap();
     let mut loader = Loader::new(config_dir);
 

From 652eb3bbb62cf883a05c23a12e50df6caf92a45b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 15:17:55 -0800
Subject: [PATCH 171/208] 0.14.0-beta2

---
 Cargo.lock                | 2 +-
 cli/Cargo.toml            | 2 +-
 cli/npm/package-lock.json | 2 +-
 cli/npm/package.json      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2edecfc7..f27e897e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -567,7 +567,7 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-cli"
-version = "0.14.0-beta1"
+version = "0.14.0-beta2"
 dependencies = [
  "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index edd14616..b7cd21d2 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tree-sitter-cli"
-version = "0.14.0-beta1"
+version = "0.14.0-beta2"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 
diff --git a/cli/npm/package-lock.json b/cli/npm/package-lock.json
index 685806c4..ff76b456 100644
--- a/cli/npm/package-lock.json
+++ b/cli/npm/package-lock.json
@@ -1,5 +1,5 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.14.0-beta1",
+  "version": "0.14.0-beta2",
   "lockfileVersion": 1
 }
diff --git a/cli/npm/package.json b/cli/npm/package.json
index 230676f1..9dfd5e7e 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -1,6 +1,6 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.14.0-beta1",
+  "version": "0.14.0-beta2",
   "author": "Max Brunsfeld",
   "license": "MIT",
   "repository": {

From 71357afb2fa55a7b35402bdbca9cc1777d2559e4 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 15:59:12 -0800
Subject: [PATCH 172/208] Add version script

---
 script/version | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100755 script/version

diff --git a/script/version b/script/version
new file mode 100755
index 00000000..4373dcdb
--- /dev/null
+++ b/script/version
@@ -0,0 +1,49 @@
+#!/usr/bin/env node
+
+const fs = require('fs');
+const path = require('path');
+const {execFileSync} = require('child_process');
+
+const cliPath = path.join(__dirname, '..', 'cli');
+const npmPath = path.join(cliPath, 'npm');
+const cargoTomlPath = path.join(cliPath, 'Cargo.toml');
+
+const npmMetadata = require(path.join(npmPath, 'package.json'));
+const npmVersion = npmMetadata.version;
+
+const cargoMetadata = fs.readFileSync(cargoTomlPath, 'utf8')
+const cargoVersionMatch = cargoMetadata.match(/version = "([^"\n]+)"/);
+const cargoVersion = cargoVersionMatch[1];
+
+if (npmVersion !== cargoVersion) {
+  console.error(`NPM version ${npmVersion} does not match Cargo version ${cargoVersion}`);
+  process.exit(1);
+}
+
+if (process.argv[2]) {
+  // Check that working directory is clean
+  const diff = execFileSync(
+    'git',
+    ['diff', '--stat'],
+    {encoding: 'utf8'}
+  );
+  if (diff.length !== 0) {
+    console.error('There are uncommited changes.');
+    process.exit(1);
+  }
+
+  const newVersion = execFileSync(
+    'npm',
+    ['version', process.argv[2], '--git-tag-version=false'],
+    {cwd: npmPath, encoding: 'utf8'}
+  ).trim().replace(/^v/, '');
+  const newCargoVersionLine = cargoVersionMatch[0].replace(cargoVersion, newVersion);
+  const newCargoMetadata = cargoMetadata.replace(cargoVersionMatch[0], newCargoVersionLine);
+  fs.writeFileSync(cargoTomlPath, newCargoMetadata, 'utf8');
+  execFileSync('cargo', ['build'], {cwd: cliPath});
+  execFileSync('git', ['commit', '-a', '-m', newVersion]);
+  execFileSync('git', ['tag', newVersion]);
+  console.log(newVersion)
+} else {
+  console.log(npmVersion);
+}

From cbcc61a8cf3f59b1d6fadc106472b7f1cbf378a8 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 17:15:10 -0800
Subject: [PATCH 173/208] Get parse command handling multiple files, add
 --time, --quiet flags

---
 cli/src/loader.rs  |   8 +--
 cli/src/main.rs    |  51 +++++++++++++---
 cli/src/parse.rs   | 147 ++++++++++++++++++++++++++++++---------------
 lib/binding/lib.rs |  13 ++++
 4 files changed, 157 insertions(+), 62 deletions(-)

diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index 70056404..6dd4e4db 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -56,12 +56,8 @@ impl Loader {
                 let entry = entry?;
                 if let Some(parser_dir_name) = entry.file_name().to_str() {
                     if parser_dir_name.starts_with("tree-sitter-") {
-                        if self
-                            .find_language_at_path(&parser_container_dir.join(parser_dir_name))
-                            .is_err()
-                        {
-                            eprintln!("Error loading {}", parser_dir_name);
-                        }
+                        self.find_language_at_path(&parser_container_dir.join(parser_dir_name))
+                            .ok();
                     }
                 }
             }
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 1860ecc2..aaf45cb1 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -37,7 +37,12 @@ fn main() {
 
 fn run() -> error::Result<()> {
     let matches = App::new("tree-sitter")
-        .version(concat!(env!("CARGO_PKG_VERSION"), " (", env!("BUILD_SHA"), ")"))
+        .version(concat!(
+            env!("CARGO_PKG_VERSION"),
+            " (",
+            env!("BUILD_SHA"),
+            ")"
+        ))
         .setting(AppSettings::SubcommandRequiredElseHelp)
         .author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
         .about("Generates and tests parsers")
@@ -57,9 +62,16 @@ fn run() -> error::Result<()> {
         .subcommand(
             SubCommand::with_name("parse")
                 .about("Parse a file")
-                .arg(Arg::with_name("path").index(1).required(true))
+                .arg(
+                    Arg::with_name("path")
+                        .index(1)
+                        .multiple(true)
+                        .required(true),
+                )
                 .arg(Arg::with_name("debug").long("debug").short("d"))
-                .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
+                .arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
+                .arg(Arg::with_name("quiet").long("quiet").short("q"))
+                .arg(Arg::with_name("time").long("time").short("t")),
         )
         .subcommand(
             SubCommand::with_name("test")
@@ -116,12 +128,35 @@ fn run() -> error::Result<()> {
     } else if let Some(matches) = matches.subcommand_matches("parse") {
         let debug = matches.is_present("debug");
         let debug_graph = matches.is_present("debug-graph");
+        let quiet = matches.is_present("quiet");
+        let time = matches.is_present("time");
         loader.find_all_languages(&vec![home_dir.join("github")])?;
-        let source_path = Path::new(matches.value_of("path").unwrap());
-        if let Some((language, _)) = loader.language_configuration_for_file_name(source_path)? {
-            parse::parse_file_at_path(language, source_path, debug, debug_graph)?;
-        } else {
-            eprintln!("No language found");
+        let paths = matches
+            .values_of("path")
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+        let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
+        for path in paths {
+            let path = Path::new(path);
+            let language =
+                if let Some((l, _)) = loader.language_configuration_for_file_name(path)? {
+                    l
+                } else if let Some(l) = loader.language_at_path(&current_dir)? {
+                    l
+                } else {
+                    eprintln!("No language found");
+                    return Ok(());
+                };
+            parse::parse_file_at_path(
+                language,
+                path,
+                max_path_length,
+                quiet,
+                time,
+                debug,
+                debug_graph,
+            )?;
         }
     }
 
diff --git a/cli/src/parse.rs b/cli/src/parse.rs
index 38b6a61c..54c02ad2 100644
--- a/cli/src/parse.rs
+++ b/cli/src/parse.rs
@@ -3,21 +3,25 @@ use super::util;
 use std::fs;
 use std::io::{self, Write};
 use std::path::Path;
+use std::time::Instant;
 use tree_sitter::{Language, LogType, Parser};
 
 pub fn parse_file_at_path(
     language: Language,
     path: &Path,
+    max_path_length: usize,
+    quiet: bool,
+    print_time: bool,
     debug: bool,
     debug_graph: bool,
 ) -> Result<()> {
-    let mut log_session = None;
+    let mut _log_session = None;
     let mut parser = Parser::new();
     parser.set_language(language)?;
-    let source_code = fs::read_to_string(path)?;
+    let source_code = fs::read(path)?;
 
     if debug_graph {
-        log_session = Some(util::log_graphs(&mut parser, "log.html")?);
+        _log_session = Some(util::log_graphs(&mut parser, "log.html")?);
     } else if debug {
         parser.set_logger(Some(Box::new(|log_type, message| {
             if log_type == LogType::Lex {
@@ -27,64 +31,111 @@ pub fn parse_file_at_path(
         })));
     }
 
+    let time = Instant::now();
     let tree = parser
-        .parse_str(&source_code, None)
+        .parse_utf8(&mut |byte, _| &source_code[byte..], None)
         .expect("Incompatible language version");
+    let duration = time.elapsed();
+    let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
 
-    drop(log_session);
+    let mut cursor = tree.walk();
 
     let stdout = io::stdout();
     let mut stdout = stdout.lock();
-    let mut cursor = tree.walk();
-    let mut needs_newline = false;
-    let mut indent_level = 0;
-    let mut did_visit_children = false;
+
+    if !quiet {
+        let mut needs_newline = false;
+        let mut indent_level = 0;
+        let mut did_visit_children = false;
+        loop {
+            let node = cursor.node();
+            let is_named = node.is_named();
+            if did_visit_children {
+                if is_named {
+                    stdout.write(b")")?;
+                    needs_newline = true;
+                }
+                if cursor.goto_next_sibling() {
+                    did_visit_children = false;
+                } else if cursor.goto_parent() {
+                    did_visit_children = true;
+                    indent_level -= 1;
+                } else {
+                    break;
+                }
+            } else {
+                if is_named {
+                    if needs_newline {
+                        stdout.write(b"\n")?;
+                    }
+                    for _ in 0..indent_level {
+                        stdout.write(b"  ")?;
+                    }
+                    let start = node.start_position();
+                    let end = node.end_position();
+                    write!(
+                        &mut stdout,
+                        "({} [{}, {}] - [{}, {}]",
+                        node.kind(),
+                        start.row,
+                        start.column,
+                        end.row,
+                        end.column
+                    )?;
+                    needs_newline = true;
+                }
+                if cursor.goto_first_child() {
+                    did_visit_children = false;
+                    indent_level += 1;
+                } else {
+                    did_visit_children = true;
+                }
+            }
+        }
+        cursor.reset(tree.root_node());
+        println!("");
+    }
+
+    let mut first_error = None;
     loop {
         let node = cursor.node();
-        let is_named = node.is_named();
-        if did_visit_children {
-            if is_named {
-                stdout.write(b")")?;
-                needs_newline = true;
-            }
-            if cursor.goto_next_sibling() {
-                did_visit_children = false;
-            } else if cursor.goto_parent() {
-                did_visit_children = true;
-                indent_level -= 1;
-            } else {
+        if node.has_error() {
+            if node.is_error() || node.is_missing() {
+                first_error = Some(node);
                 break;
-            }
-        } else {
-            if is_named {
-                if needs_newline {
-                    stdout.write(b"\n")?;
-                }
-                for _ in 0..indent_level {
-                    stdout.write(b"  ")?;
-                }
-                let start = node.start_position();
-                let end = node.end_position();
-                write!(
-                    &mut stdout,
-                    "({} [{}, {}] - [{}, {}]",
-                    node.kind(),
-                    start.row,
-                    start.column,
-                    end.row,
-                    end.column
-                )?;
-                needs_newline = true;
-            }
-            if cursor.goto_first_child() {
-                did_visit_children = false;
-                indent_level += 1;
             } else {
-                did_visit_children = true;
+                cursor.goto_first_child();
+            }
+        } else if !cursor.goto_next_sibling() {
+            if !cursor.goto_parent() {
+                break;
             }
         }
     }
 
-    println!("");
+    if first_error.is_some() || print_time {
+        write!(
+            &mut stdout,
+            "{:width$}\t{} ms",
+            path.to_str().unwrap(),
+            duration_ms,
+            width = max_path_length
+        )?;
+        if let Some(node) = first_error {
+            let start = node.start_position();
+            let end = node.end_position();
+            write!(
+                &mut stdout,
+                "\t({} [{}, {}] - [{}, {}]",
+                node.kind(),
+                start.row,
+                start.column,
+                end.row,
+                end.column
+            )?;
+        }
+        write!(&mut stdout, "\n")?;
+    }
+
     Ok(())
 }
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index fdb243ec..8143fd6b 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -19,6 +19,7 @@ use std::marker::PhantomData;
 use std::os::raw::{c_char, c_void};
 use std::ptr;
 use std::str;
+use std::u16;
 
 #[derive(Clone, Copy)]
 #[repr(transparent)]
@@ -479,6 +480,14 @@ impl<'tree> Node<'tree> {
         unsafe { ffi::ts_node_has_error(self.0) }
     }
 
+    pub fn is_error(&self) -> bool {
+        self.kind_id() == u16::MAX
+    }
+
+    pub fn is_missing(&self) -> bool {
+        unsafe { ffi::ts_node_is_missing(self.0) }
+    }
+
     pub fn start_byte(&self) -> usize {
         unsafe { ffi::ts_node_start_byte(self.0) as usize }
     }
@@ -622,6 +631,10 @@ impl<'a> TreeCursor<'a> {
             Some(result as usize)
         }
     }
+
+    pub fn reset(&mut self, node: Node<'a>) {
+        unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) };
+    }
 }
 
 impl<'a> Drop for TreeCursor<'a> {

From ed195de8b68476572c72aa60d6da83d83e2dfd33 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 17:16:04 -0800
Subject: [PATCH 174/208] rustfmt

---
 cli/build.rs                                  |   2 +-
 .../build_tables/build_parse_table.rs         |  94 ++++++-----
 .../generate/prepare_grammar/expand_tokens.rs |  16 +-
 .../prepare_grammar/extract_simple_aliases.rs | 147 ++++++++++--------
 .../prepare_grammar/extract_tokens.rs         |   2 +-
 .../prepare_grammar/flatten_grammar.rs        |   2 +-
 .../prepare_grammar/intern_symbols.rs         |   4 +-
 cli/src/generate/properties.rs                |   6 +-
 cli/src/generate/render.rs                    |   4 +-
 cli/src/generate/rules.rs                     |   5 +-
 cli/src/tests/corpuses.rs                     |   2 +-
 cli/src/tests/fixtures.rs                     |   9 +-
 cli/src/tests/mod.rs                          |   2 +-
 cli/src/tests/parser_api.rs                   |   2 +-
 cli/src/util.rs                               |  16 +-
 15 files changed, 170 insertions(+), 143 deletions(-)

diff --git a/cli/build.rs b/cli/build.rs
index f8e62274..b24eef82 100644
--- a/cli/build.rs
+++ b/cli/build.rs
@@ -1,4 +1,4 @@
-use std::{io, env, fs};
+use std::{env, fs, io};
 
 fn main() {
     let git_sha = read_git_sha().unwrap();
diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs
index 792a8759..bd790b29 100644
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@@ -456,60 +456,68 @@ impl<'a> ParseTableBuilder<'a> {
         .unwrap();
         write!(&mut msg, "Possible interpretations:\n\n").unwrap();
 
-        let interpretions = conflicting_items.iter().enumerate().map(|(i, item)| {
-            let mut line = String::new();
-            write!(&mut line, "  {}:", i + 1).unwrap();
+        let interpretions = conflicting_items
+            .iter()
+            .enumerate()
+            .map(|(i, item)| {
+                let mut line = String::new();
+                write!(&mut line, "  {}:", i + 1).unwrap();
 
-            for preceding_symbol in preceding_symbols
-                .iter()
-                .take(preceding_symbols.len() - item.step_index as usize)
-            {
-                write!(&mut line, "  {}", self.symbol_name(preceding_symbol)).unwrap();
-            }
-
-            write!(
-                &mut line,
-                "  ({}",
-                &self.syntax_grammar.variables[item.variable_index as usize].name
-            )
-            .unwrap();
-
-            for (j, step) in item.production.steps.iter().enumerate() {
-                if j as u32 == item.step_index {
-                    write!(&mut line, "  •").unwrap();
+                for preceding_symbol in preceding_symbols
+                    .iter()
+                    .take(preceding_symbols.len() - item.step_index as usize)
+                {
+                    write!(&mut line, "  {}", self.symbol_name(preceding_symbol)).unwrap();
                 }
-                write!(&mut line, "  {}", self.symbol_name(&step.symbol)).unwrap();
-            }
 
-            write!(&mut line, ")").unwrap();
-
-            if item.is_done() {
                 write!(
                     &mut line,
-                    "  •  {}  …",
-                    self.symbol_name(&conflicting_lookahead)
+                    "  ({}",
+                    &self.syntax_grammar.variables[item.variable_index as usize].name
                 )
                 .unwrap();
-            }
 
-            let precedence = item.precedence();
-            let associativity = item.associativity();
+                for (j, step) in item.production.steps.iter().enumerate() {
+                    if j as u32 == item.step_index {
+                        write!(&mut line, "  •").unwrap();
+                    }
+                    write!(&mut line, "  {}", self.symbol_name(&step.symbol)).unwrap();
+                }
 
-            let prec_line = if let Some(associativity) = associativity {
-                Some(format!(
-                    "(precedence: {}, associativity: {:?})",
-                    precedence, associativity
-                ))
-            } else if precedence > 0 {
-                Some(format!("(precedence: {})", precedence))
-            } else {
-                None
-            };
+                write!(&mut line, ")").unwrap();
 
-            (line, prec_line)
-        }).collect::<Vec<_>>();
+                if item.is_done() {
+                    write!(
+                        &mut line,
+                        "  •  {}  …",
+                        self.symbol_name(&conflicting_lookahead)
+                    )
+                    .unwrap();
+                }
 
-        let max_interpretation_length = interpretions.iter().map(|i| i.0.chars().count()).max().unwrap();
+                let precedence = item.precedence();
+                let associativity = item.associativity();
+
+                let prec_line = if let Some(associativity) = associativity {
+                    Some(format!(
+                        "(precedence: {}, associativity: {:?})",
+                        precedence, associativity
+                    ))
+                } else if precedence > 0 {
+                    Some(format!("(precedence: {})", precedence))
+                } else {
+                    None
+                };
+
+                (line, prec_line)
+            })
+            .collect::<Vec<_>>();
+
+        let max_interpretation_length = interpretions
+            .iter()
+            .map(|i| i.0.chars().count())
+            .max()
+            .unwrap();
 
         for (line, prec_suffix) in interpretions {
             msg += &line;
diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
index 6b92713e..8e0f12fe 100644
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -3,14 +3,15 @@ use crate::error::{Error, Result};
 use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
 use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
 use crate::generate::rules::Rule;
+use regex::Regex;
 use regex_syntax::ast::{
     parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
 };
-use regex::Regex;
 use std::i32;
 
 lazy_static! {
-    static ref CURLY_BRACE_REGEX: Regex = Regex::new(r#"(^|[^\\])\{([^}]*[^0-9,}][^}]*)\}"#).unwrap();
+    static ref CURLY_BRACE_REGEX: Regex =
+        Regex::new(r#"(^|[^\\])\{([^}]*[^0-9,}][^}]*)\}"#).unwrap();
 }
 
 const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
@@ -621,14 +622,9 @@ mod tests {
             },
             // nested groups
             Row {
-                rules: vec![Rule::seq(vec![
-                    Rule::pattern(r#"([^x\\]|\\(.|\n))+"#),
-                ])],
+                rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
                 separators: vec![],
-                examples: vec![
-                    ("abcx", Some((0, "abc"))),
-                    ("abc\\0x", Some((0, "abc\\0"))),
-                ],
+                examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
             },
             // allowing unrecognized escape sequences
             Row {
@@ -660,7 +656,7 @@ mod tests {
                     ("u{1234} ok", Some((0, "u{1234}"))),
                     ("{aba}}", Some((1, "{aba}"))),
                 ],
-            }
+            },
         ];
 
         for Row {
diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs
index 84c535b9..79ea5e67 100644
--- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs
+++ b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs
@@ -1,5 +1,5 @@
-use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
 use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
+use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
 
 #[derive(Clone, Default)]
 struct SymbolStatus {
@@ -9,20 +9,34 @@ struct SymbolStatus {
 
 pub(super) fn extract_simple_aliases(
     syntax_grammar: &mut SyntaxGrammar,
-    lexical_grammar: &LexicalGrammar
+    lexical_grammar: &LexicalGrammar,
 ) -> AliasMap {
     // Determine which symbols in the grammars are *always* aliased to a single name.
     let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
-    let mut non_terminal_status_list = vec![SymbolStatus::default(); syntax_grammar.variables.len()];
-    let mut external_status_list = vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
+    let mut non_terminal_status_list =
+        vec![SymbolStatus::default(); syntax_grammar.variables.len()];
+    let mut external_status_list =
+        vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
     for variable in syntax_grammar.variables.iter() {
         for production in variable.productions.iter() {
             for step in production.steps.iter() {
                 let mut status = match step.symbol {
-                    Symbol { kind: SymbolType::External, index} => &mut external_status_list[index],
-                    Symbol { kind: SymbolType::NonTerminal, index} => &mut non_terminal_status_list[index],
-                    Symbol { kind: SymbolType::Terminal, index} => &mut terminal_status_list[index],
-                    Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
+                    Symbol {
+                        kind: SymbolType::External,
+                        index,
+                    } => &mut external_status_list[index],
+                    Symbol {
+                        kind: SymbolType::NonTerminal,
+                        index,
+                    } => &mut non_terminal_status_list[index],
+                    Symbol {
+                        kind: SymbolType::Terminal,
+                        index,
+                    } => &mut terminal_status_list[index],
+                    Symbol {
+                        kind: SymbolType::End,
+                        ..
+                    } => panic!("Unexpected end token"),
                 };
 
                 if step.alias.is_none() {
@@ -47,10 +61,22 @@ pub(super) fn extract_simple_aliases(
         for production in variable.productions.iter_mut() {
             for step in production.steps.iter_mut() {
                 let status = match step.symbol {
-                    Symbol { kind: SymbolType::External, index} => &external_status_list[index],
-                    Symbol { kind: SymbolType::NonTerminal, index} => &non_terminal_status_list[index],
-                    Symbol { kind: SymbolType::Terminal, index} => &terminal_status_list[index],
-                    Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
+                    Symbol {
+                        kind: SymbolType::External,
+                        index,
+                    } => &external_status_list[index],
+                    Symbol {
+                        kind: SymbolType::NonTerminal,
+                        index,
+                    } => &non_terminal_status_list[index],
+                    Symbol {
+                        kind: SymbolType::Terminal,
+                        index,
+                    } => &terminal_status_list[index],
+                    Symbol {
+                        kind: SymbolType::End,
+                        ..
+                    } => panic!("Unexpected end token"),
                 };
 
                 if status.alias.is_some() {
@@ -83,7 +109,9 @@ pub(super) fn extract_simple_aliases(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::generate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep};
+    use crate::generate::grammars::{
+        LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
+    };
     use crate::generate::nfa::Nfa;
 
     #[test]
@@ -93,35 +121,29 @@ mod tests {
                 SyntaxVariable {
                     name: "v1".to_owned(),
                     kind: VariableType::Named,
-                    productions: vec![
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![
-                                ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
-                                ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
-                                ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
-                            ],
-                        },
-                    ],
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
+                        ],
+                    }],
                 },
                 SyntaxVariable {
                     name: "v2".to_owned(),
                     kind: VariableType::Named,
-                    productions: vec![
-                        Production {
-                            dynamic_precedence: 0,
-                            steps: vec![
-                                // Token 0 is always aliased as "a1".
-                                ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
-
-                                // Token 1 is aliased above, but not here.
-                                ProductionStep::new(Symbol::terminal(1)),
-
-                                // Token 2 is aliased differently than above.
-                                ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
-                            ],
-                        },
-                    ],
+                    productions: vec![Production {
+                        dynamic_precedence: 0,
+                        steps: vec![
+                            // Token 0 is always aliased as "a1".
+                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
+                            // Token 1 is aliased above, but not here.
+                            ProductionStep::new(Symbol::terminal(1)),
+                            // Token 2 is aliased differently than above.
+                            ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
+                        ],
+                    }],
                 },
             ],
             extra_tokens: Vec::new(),
@@ -151,49 +173,50 @@ mod tests {
                     kind: VariableType::Anonymous,
                     implicit_precedence: 0,
                     start_state: 0,
-                }
+                },
             ],
         };
 
         let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
         assert_eq!(simple_aliases.len(), 1);
-        assert_eq!(simple_aliases[&Symbol::terminal(0)], Alias {
-            value: "a1".to_string(),
-            is_named: true,
-        });
+        assert_eq!(
+            simple_aliases[&Symbol::terminal(0)],
+            Alias {
+                value: "a1".to_string(),
+                is_named: true,
+            }
+        );
 
-        assert_eq!(syntax_grammar.variables, vec![
-            SyntaxVariable {
-                name: "v1".to_owned(),
-                kind: VariableType::Named,
-                productions: vec![
-                    Production {
+        assert_eq!(
+            syntax_grammar.variables,
+            vec![
+                SyntaxVariable {
+                    name: "v1".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
                         dynamic_precedence: 0,
                         steps: vec![
                             // 'Simple' alias removed
                             ProductionStep::new(Symbol::terminal(0)),
-
                             // Other aliases unchanged
                             ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
                             ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
                         ],
-                    },
-                ],
-            },
-            SyntaxVariable {
-                name: "v2".to_owned(),
-                kind: VariableType::Named,
-                productions: vec![
-                    Production {
+                    },],
+                },
+                SyntaxVariable {
+                    name: "v2".to_owned(),
+                    kind: VariableType::Named,
+                    productions: vec![Production {
                         dynamic_precedence: 0,
                         steps: vec![
                             ProductionStep::new(Symbol::terminal(0)),
                             ProductionStep::new(Symbol::terminal(1)),
                             ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
                         ],
-                    },
-                ],
-            },
-        ]);
+                    },],
+                },
+            ]
+        );
     }
 }
diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs
index ae07763b..88afb50f 100644
--- a/cli/src/generate/prepare_grammar/extract_tokens.rs
+++ b/cli/src/generate/prepare_grammar/extract_tokens.rs
@@ -243,7 +243,7 @@ impl TokenExtractor {
             Variable {
                 name: string_value.clone(),
                 kind: VariableType::Anonymous,
-                rule: rule.clone()
+                rule: rule.clone(),
             }
         } else {
             self.current_variable_token_count += 1;
diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs
index 204ceb07..98276b7e 100644
--- a/cli/src/generate/prepare_grammar/flatten_grammar.rs
+++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs
@@ -1,9 +1,9 @@
 use super::ExtractedSyntaxGrammar;
 use crate::error::{Error, Result};
-use crate::generate::rules::Symbol;
 use crate::generate::grammars::{
     Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
 };
+use crate::generate::rules::Symbol;
 use crate::generate::rules::{Alias, Associativity, Rule};
 
 struct RuleFlattener {
diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs
index 8b07309b..d742864c 100644
--- a/cli/src/generate/prepare_grammar/intern_symbols.rs
+++ b/cli/src/generate/prepare_grammar/intern_symbols.rs
@@ -7,9 +7,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
     let interner = Interner { grammar };
 
     if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
-        return Err(Error(
-            "A grammar's start rule must be visible.".to_string(),
-        ));
+        return Err(Error("A grammar's start rule must be visible.".to_string()));
     }
 
     let mut variables = Vec::with_capacity(grammar.variables.len());
diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index bf299af4..4df4d67d 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -178,7 +178,6 @@ impl Builder {
                         text: step.text_pattern.clone(),
                         state_id: 0,
                     },
-
                     // Include the rule id so that it can be used when sorting transitions.
                     item.rule_id,
                 ));
@@ -432,9 +431,8 @@ pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
             let property_sheet_json_path = src_dir_path
                 .join(css_path.file_name().unwrap())
                 .with_extension("json");
-            let property_sheet_json_file = File::create(&property_sheet_json_path).map_err(|e|
-                format!("Failed to create {:?}: {}", property_sheet_json_path, e)
-            )?;
+            let property_sheet_json_file = File::create(&property_sheet_json_path)
+                .map_err(|e| format!("Failed to create {:?}: {}", property_sheet_json_path, e))?;
             let mut writer = BufWriter::new(property_sheet_json_file);
             serde_json::to_writer_pretty(&mut writer, &sheet)?;
         }
diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs
index 1da7f99d..5e87189c 100644
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@@ -563,7 +563,9 @@ impl Generator {
         indent!(self);
         for i in 0..self.syntax_grammar.external_tokens.len() {
             let token = &self.syntax_grammar.external_tokens[i];
-            let id_token = token.corresponding_internal_token.unwrap_or(Symbol::external(i));
+            let id_token = token
+                .corresponding_internal_token
+                .unwrap_or(Symbol::external(i));
             add_line!(
                 self,
                 "[{}] = {},",
diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs
index e15070ea..09a20294 100644
--- a/cli/src/generate/rules.rs
+++ b/cli/src/generate/rules.rs
@@ -59,10 +59,7 @@ pub(crate) enum Rule {
 impl Rule {
     pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
         add_metadata(content, move |params| {
-            params.alias = Some(Alias {
-                is_named,
-                value
-            });
+            params.alias = Some(Alias { is_named, value });
         })
     }
 
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index 173426d6..b28b2510 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -1,4 +1,4 @@
-use super::fixtures::{get_language, get_test_language, fixtures_dir};
+use super::fixtures::{fixtures_dir, get_language, get_test_language};
 use crate::generate;
 use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
diff --git a/cli/src/tests/fixtures.rs b/cli/src/tests/fixtures.rs
index 978a1212..639b1004 100644
--- a/cli/src/tests/fixtures.rs
+++ b/cli/src/tests/fixtures.rs
@@ -1,7 +1,7 @@
 use crate::loader::Loader;
+use std::fs;
 use std::path::{Path, PathBuf};
 use tree_sitter::Language;
-use std::fs;
 
 lazy_static! {
     static ref ROOT_DIR: PathBuf = [env!("CARGO_MANIFEST_DIR"), ".."].iter().collect();
@@ -41,11 +41,6 @@ pub fn get_test_language(name: &str, parser_code: String, path: &Path) -> Langua
         None
     };
     TEST_LOADER
-        .load_language_from_sources(
-            name,
-            &HEADER_DIR,
-            &parser_c_path,
-            &scanner_path,
-        )
+        .load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
         .unwrap()
 }
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index c9f1dda4..a874358a 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -1,3 +1,3 @@
-mod fixtures;
 mod corpuses;
+mod fixtures;
 mod parser_api;
diff --git a/cli/src/tests/parser_api.rs b/cli/src/tests/parser_api.rs
index a399bf38..9a4ce9f1 100644
--- a/cli/src/tests/parser_api.rs
+++ b/cli/src/tests/parser_api.rs
@@ -1,6 +1,6 @@
 use super::fixtures::get_language;
 use std::thread;
-use tree_sitter::{InputEdit, LogType, Parser, Point, PropertySheet, Language};
+use tree_sitter::{InputEdit, Language, LogType, Parser, Point, PropertySheet};
 
 fn rust() -> Language {
     get_language("rust")
diff --git a/cli/src/util.rs b/cli/src/util.rs
index 166e54d0..b1073624 100644
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@@ -35,7 +35,11 @@ pub(crate) fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<Log
         .take()
         .expect("Failed to open stdin for Dot");
     parser.print_dot_graphs(&dot_stdin);
-    Ok(LogSession(PathBuf::from(path), Some(dot_process), Some(dot_stdin)))
+    Ok(LogSession(
+        PathBuf::from(path),
+        Some(dot_process),
+        Some(dot_stdin),
+    ))
 }
 
 #[cfg(unix)]
@@ -46,11 +50,17 @@ impl Drop for LogSession {
         drop(self.2.take().unwrap());
         let output = self.1.take().unwrap().wait_with_output().unwrap();
         if output.status.success() {
-            if cfg!(target_os = "macos") && fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64 {
+            if cfg!(target_os = "macos")
+                && fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64
+            {
                 Command::new("open").arg("log.html").output().unwrap();
             }
         } else {
-            eprintln!("Dot failed: {} {}", String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr));
+            eprintln!(
+                "Dot failed: {} {}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            );
         }
     }
 }

From 1d463522977f1e8f0590d626e174c5b0a6dc5f2b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 17 Jan 2019 17:26:48 -0800
Subject: [PATCH 175/208] Fix check for APPVEYOR_REPO_TAG

---
 .appveyor.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 7d4acdc7..de82a7d5 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,7 +1,7 @@
 build: false
 install:
   # Terminate early unless building either a tag or a PR.
-  - if not defined APPVEYOR_REPO_TAG if not "%APPVEYOR_REPO_BRANCH%" == "master" appveyor exit
+  - if "%APPVEYOR_REPO_TAG%" == "false" if not "%APPVEYOR_REPO_BRANCH%" == "master" appveyor exit
 
   # Install rust
   - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe

From 31bdf5eb97faabcf61e0f4b911b34ebaa4c319f2 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 Jan 2019 09:40:09 -0800
Subject: [PATCH 176/208] Fix handling of JavaScript errors

Refs #258
---
 cli/src/generate/dsl.js |  3 +--
 cli/src/generate/mod.rs | 16 ++++++++--------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js
index 950b2d3b..1a9bed20 100644
--- a/cli/src/generate/dsl.js
+++ b/cli/src/generate/dsl.js
@@ -135,7 +135,6 @@ token.immediate = function(value) {
 }
 
 function normalize(value) {
-
   if (typeof value == "undefined")
     throw new Error("Undefined symbol");
 
@@ -289,7 +288,7 @@ function grammar(baseGrammar, options) {
           throw new Error("Grammar's conflicts must be an array of arrays of rules.");
         }
 
-        return conflictSet.map(symbol => symbol.name);
+        return conflictSet.map(symbol => normalize(symbol).name);
       });
     }
 
diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 062a9e6b..535f9d19 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -36,7 +36,7 @@ pub fn generate_parser_in_directory(
 ) -> Result<()> {
     if !properties_only {
         let grammar_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
-        let grammar_json = load_grammar_file(&grammar_path);
+        let grammar_json = load_grammar_file(&grammar_path)?;
         let (language_name, c_code) =
             generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
         let repo_src_path = repo_path.join("src");
@@ -92,15 +92,15 @@ fn generate_parser_for_grammar_with_opts(
     Ok((input_grammar.name, c_code))
 }
 
-fn load_grammar_file(grammar_path: &PathBuf) -> String {
+fn load_grammar_file(grammar_path: &PathBuf) -> Result<String> {
     match grammar_path.extension().and_then(|e| e.to_str()) {
-        Some("js") => load_js_grammar_file(grammar_path),
-        Some("json") => fs::read_to_string(grammar_path).expect("Failed to read grammar file"),
-        _ => panic!("Unknown grammar file extension"),
+        Some("js") => Ok(load_js_grammar_file(grammar_path)?),
+        Some("json") => Ok(fs::read_to_string(grammar_path)?),
+        _ => Err(Error(format!("Unknown grammar file extension: {:?}", grammar_path))),
     }
 }
 
-fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
+fn load_js_grammar_file(grammar_path: &PathBuf) -> Result<String> {
     let mut node_process = Command::new("node")
         .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
         .stdin(Stdio::piped())
@@ -123,10 +123,10 @@ fn load_js_grammar_file(grammar_path: &PathBuf) -> String {
     match output.status.code() {
         None => panic!("Node process was killed"),
         Some(0) => {}
-        Some(code) => panic!(format!("Node process exited with status {}", code)),
+        Some(code) => return Err(Error(format!("Node process exited with status {}", code))),
     }
 
-    String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
+    Ok(String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"))
 }
 
 fn ensure_file(path: &PathBuf, f: impl Fn() -> String) -> Result<()> {

From ff41f05a204e6bf8679e4e490c99dd671bb79ba5 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 Jan 2019 15:13:13 -0800
Subject: [PATCH 177/208] Fix computation of following tokens

---
 .../build_tables/build_parse_table.rs         | 61 ++++++++++++-------
 cli/src/generate/build_tables/item.rs         | 26 ++++++--
 .../generate/build_tables/item_set_builder.rs | 40 +++++++++++-
 .../generate/build_tables/token_conflicts.rs  | 32 ++++++++--
 cli/src/generate/grammars.rs                  |  2 +-
 5 files changed, 125 insertions(+), 36 deletions(-)

diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs
index bd790b29..5351f72e 100644
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@@ -41,12 +41,11 @@ struct ParseTableBuilder<'a> {
     item_sets_by_state_id: Vec<ParseItemSet<'a>>,
     parse_state_queue: VecDeque<ParseStateQueueEntry>,
     parse_table: ParseTable,
-    following_tokens: Vec<TokenSet>,
     state_ids_to_log: Vec<ParseStateId>,
 }
 
 impl<'a> ParseTableBuilder<'a> {
-    fn build(mut self) -> Result<(ParseTable, Vec<TokenSet>)> {
+    fn build(mut self) -> Result<ParseTable> {
         // Ensure that the empty alias sequence has index 0.
         self.parse_table.alias_sequences.push(Vec::new());
 
@@ -99,7 +98,7 @@ impl<'a> ParseTableBuilder<'a> {
 
         self.remove_precedences();
 
-        Ok((self.parse_table, self.following_tokens))
+        Ok(self.parse_table)
     }
 
     fn add_parse_state(
@@ -108,20 +107,6 @@ impl<'a> ParseTableBuilder<'a> {
         preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
         item_set: ParseItemSet<'a>,
     ) -> ParseStateId {
-        if preceding_symbols.len() > 1 {
-            let left_tokens = self
-                .item_set_builder
-                .last_set(&preceding_symbols[preceding_symbols.len() - 2]);
-            let right_tokens = self
-                .item_set_builder
-                .first_set(&preceding_symbols[preceding_symbols.len() - 1]);
-            for left_token in left_tokens.iter() {
-                if left_token.is_terminal() {
-                    self.following_tokens[left_token.index].insert_all(right_tokens);
-                }
-            }
-        }
-
         let mut hasher = DefaultHasher::new();
         item_set.hash_unfinished_items(&mut hasher);
         let unfinished_item_signature = hasher.finish();
@@ -705,17 +690,50 @@ impl<'a> ParseTableBuilder<'a> {
     }
 }
 
+fn populate_following_tokens(
+    result: &mut Vec<TokenSet>,
+    grammar: &SyntaxGrammar,
+    inlines: &InlinedProductionMap,
+    builder: &ParseItemSetBuilder,
+) {
+    let productions = grammar
+        .variables
+        .iter()
+        .flat_map(|v| &v.productions)
+        .chain(&inlines.productions);
+    for production in productions {
+        for i in 1..production.steps.len() {
+            let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
+            let right_tokens = builder.first_set(&production.steps[i].symbol);
+            for left_token in left_tokens.iter() {
+                if left_token.is_terminal() {
+                    result[left_token.index].insert_all_terminals(right_tokens);
+                }
+            }
+        }
+    }
+}
+
 pub(crate) fn build_parse_table(
     syntax_grammar: &SyntaxGrammar,
     lexical_grammar: &LexicalGrammar,
     inlines: &InlinedProductionMap,
     state_ids_to_log: Vec<usize>,
 ) -> Result<(ParseTable, Vec<TokenSet>)> {
-    ParseTableBuilder {
+    let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
+    let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
+    populate_following_tokens(
+        &mut following_tokens,
+        syntax_grammar,
+        inlines,
+        &item_set_builder,
+    );
+
+    let table = ParseTableBuilder {
         syntax_grammar,
         lexical_grammar,
         state_ids_to_log,
-        item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines),
+        item_set_builder,
         state_ids_by_item_set: HashMap::new(),
         item_sets_by_state_id: Vec::new(),
         parse_state_queue: VecDeque::new(),
@@ -725,7 +743,8 @@ pub(crate) fn build_parse_table(
             alias_sequences: Vec::new(),
             max_aliased_production_length: 0,
         },
-        following_tokens: vec![TokenSet::new(); lexical_grammar.variables.len()],
     }
-    .build()
+    .build()?;
+
+    Ok((table, following_tokens))
 }
diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs
index 6c74d465..9f3307dd 100644
--- a/cli/src/generate/build_tables/item.rs
+++ b/cli/src/generate/build_tables/item.rs
@@ -48,7 +48,11 @@ pub(crate) struct ParseItemDisplay<'a>(
     pub &'a LexicalGrammar,
 );
 
-pub(crate) struct TokenSetDisplay<'a>(&'a TokenSet, &'a SyntaxGrammar, &'a LexicalGrammar);
+pub(crate) struct TokenSetDisplay<'a>(
+    pub &'a TokenSet,
+    pub &'a SyntaxGrammar,
+    pub &'a LexicalGrammar,
+);
 
 #[allow(dead_code)]
 pub(crate) struct ParseItemSetDisplay<'a>(
@@ -134,30 +138,42 @@ impl TokenSet {
         vec.set(other.index, true);
     }
 
-    pub fn insert_all(&mut self, other: &TokenSet) -> bool {
+    pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
         let mut result = false;
         if other.terminal_bits.len() > self.terminal_bits.len() {
             self.terminal_bits.resize(other.terminal_bits.len(), false);
         }
-        if other.external_bits.len() > self.external_bits.len() {
-            self.external_bits.resize(other.external_bits.len(), false);
-        }
         for (i, element) in other.terminal_bits.iter().enumerate() {
             if element {
                 result |= !self.terminal_bits[i];
                 self.terminal_bits.set(i, element);
             }
         }
+        result
+    }
+
+    fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
+        let mut result = false;
+        if other.external_bits.len() > self.external_bits.len() {
+            self.external_bits.resize(other.external_bits.len(), false);
+        }
         for (i, element) in other.external_bits.iter().enumerate() {
             if element {
                 result |= !self.external_bits[i];
                 self.external_bits.set(i, element);
             }
         }
+        result
+    }
+
+    pub fn insert_all(&mut self, other: &TokenSet) -> bool {
+        let mut result = false;
         if other.eof {
             result |= !self.eof;
             self.eof = true;
         }
+        result |= self.insert_all_terminals(other);
+        result |= self.insert_all_externals(other);
         result
     }
 }
diff --git a/cli/src/generate/build_tables/item_set_builder.rs b/cli/src/generate/build_tables/item_set_builder.rs
index b941b179..9a929f05 100644
--- a/cli/src/generate/build_tables/item_set_builder.rs
+++ b/cli/src/generate/build_tables/item_set_builder.rs
@@ -1,6 +1,6 @@
-use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet};
+use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSet, TokenSetDisplay};
 use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
-use crate::generate::rules::Symbol;
+use crate::generate::rules::{Symbol, SymbolType};
 use hashbrown::{HashMap, HashSet};
 use std::fmt;
 
@@ -268,7 +268,7 @@ impl<'a> ParseItemSetBuilder<'a> {
     }
 
     pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
-        &self.first_sets[symbol]
+        &self.last_sets[symbol]
     }
 
     fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
@@ -300,6 +300,40 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "ParseItemSetBuilder {{\n")?;
 
+        write!(f, "  first_sets: {{\n")?;
+        for (symbol, first_set) in &self.first_sets {
+            let name = match symbol.kind {
+                SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
+                SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
+                SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
+                SymbolType::End => "END",
+            };
+            write!(
+                f,
+                "    first({:?}): {}\n",
+                name,
+                TokenSetDisplay(first_set, &self.syntax_grammar, &self.lexical_grammar)
+            )?;
+        }
+        write!(f, "  }}\n")?;
+
+        write!(f, "  last_sets: {{\n")?;
+        for (symbol, last_set) in &self.last_sets {
+            let name = match symbol.kind {
+                SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
+                SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
+                SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
+                SymbolType::End => "END",
+            };
+            write!(
+                f,
+                "    last({:?}): {}\n",
+                name,
+                TokenSetDisplay(last_set, &self.syntax_grammar, &self.lexical_grammar)
+            )?;
+        }
+        write!(f, "  }}\n")?;
+
         write!(f, "  additions: {{\n")?;
         for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
             write!(f, "    {}: {{\n", variable.name)?;
diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs
index 1f89022a..1c4fc753 100644
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@@ -1,5 +1,5 @@
-use crate::generate::build_tables::item::TokenSet;
-use crate::generate::grammars::LexicalGrammar;
+use crate::generate::build_tables::item::{TokenSet, TokenSetDisplay};
+use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
 use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
 use hashbrown::HashSet;
 use std::cmp::Ordering;
@@ -16,6 +16,7 @@ struct TokenConflictStatus {
 pub(crate) struct TokenConflictMap<'a> {
     n: usize,
     status_matrix: Vec<TokenConflictStatus>,
+    following_tokens: Vec<TokenSet>,
     starting_chars_by_index: Vec<CharacterSet>,
     following_chars_by_index: Vec<CharacterSet>,
     grammar: &'a LexicalGrammar,
@@ -25,7 +26,7 @@ impl<'a> TokenConflictMap<'a> {
     pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
         let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
         let starting_chars = get_starting_chars(&mut cursor, grammar);
-        let following_chars = get_following_chars(&starting_chars, following_tokens);
+        let following_chars = get_following_chars(&starting_chars, &following_tokens);
 
         let n = grammar.variables.len();
         let mut status_matrix = vec![TokenConflictStatus::default(); n * n];
@@ -40,6 +41,7 @@ impl<'a> TokenConflictMap<'a> {
         TokenConflictMap {
             n,
             status_matrix,
+            following_tokens,
             starting_chars_by_index: starting_chars,
             following_chars_by_index: following_chars,
             grammar,
@@ -115,9 +117,27 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "TokenConflictMap {{\n")?;
 
+        let syntax_grammar = SyntaxGrammar::default();
+
+        write!(f, "  following_tokens: {{\n")?;
+        for (i, following_tokens) in self.following_tokens.iter().enumerate() {
+            write!(
+                f,
+                "    follow({:?}): {},\n",
+                self.grammar.variables[i].name,
+                TokenSetDisplay(following_tokens, &syntax_grammar, &self.grammar)
+            )?;
+        }
+        write!(f, "  }},\n")?;
+
         write!(f, "  starting_characters: {{\n")?;
         for i in 0..self.n {
-            write!(f, "    {}: {:?},\n", i, self.starting_chars_by_index[i])?;
+            write!(
+                f,
+                "    {:?}: {:?},\n",
+                self.grammar.variables[i].name,
+                self.starting_chars_by_index[i]
+            )?;
         }
         write!(f, "  }},\n")?;
 
@@ -169,10 +189,10 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
 
 fn get_following_chars(
     starting_chars: &Vec<CharacterSet>,
-    following_tokens: Vec<TokenSet>,
+    following_tokens: &Vec<TokenSet>,
 ) -> Vec<CharacterSet> {
     following_tokens
-        .into_iter()
+        .iter()
         .map(|following_tokens| {
             let mut chars = CharacterSet::empty();
             for token in following_tokens.iter() {
diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs
index 3cedcd42..c9282da3 100644
--- a/cli/src/generate/grammars.rs
+++ b/cli/src/generate/grammars.rs
@@ -81,7 +81,7 @@ pub(crate) struct ExternalToken {
     pub corresponding_internal_token: Option<Symbol>,
 }
 
-#[derive(Debug)]
+#[derive(Debug, Default)]
 pub(crate) struct SyntaxGrammar {
     pub variables: Vec<SyntaxVariable>,
     pub extra_tokens: Vec<Symbol>,

From 9e610bf88e937daf6a3b3768ec1224b516dd3bd9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 18 Jan 2019 15:16:51 -0800
Subject: [PATCH 178/208] 0.14.0-beta3

---
 Cargo.lock                | 2 +-
 cli/Cargo.toml            | 2 +-
 cli/npm/package-lock.json | 2 +-
 cli/npm/package.json      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f27e897e..003978c1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -567,7 +567,7 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-cli"
-version = "0.14.0-beta2"
+version = "0.14.0-beta3"
 dependencies = [
  "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index b7cd21d2..d8e50bbf 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tree-sitter-cli"
-version = "0.14.0-beta2"
+version = "0.14.0-beta3"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 
diff --git a/cli/npm/package-lock.json b/cli/npm/package-lock.json
index ff76b456..4590ac72 100644
--- a/cli/npm/package-lock.json
+++ b/cli/npm/package-lock.json
@@ -1,5 +1,5 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.14.0-beta2",
+  "version": "0.14.0-beta3",
   "lockfileVersion": 1
 }
diff --git a/cli/npm/package.json b/cli/npm/package.json
index 9dfd5e7e..276ea9d8 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -1,6 +1,6 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.14.0-beta2",
+  "version": "0.14.0-beta3",
   "author": "Max Brunsfeld",
   "license": "MIT",
   "repository": {

From f6cdd5e3d4817c6dc0fffaeb0c50574171d80309 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 20 Jan 2019 16:58:31 -0800
Subject: [PATCH 179/208] Loosen criteria for identifying conflict-free tokens
 for error recovery

---
 cli/src/generate/build_tables/mod.rs          |  4 +-
 .../generate/build_tables/token_conflicts.rs  | 82 ++++++++++++++++---
 2 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index 92fddefe..ca54f274 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -86,14 +86,14 @@ fn populate_error_state(
     let n = lexical_grammar.variables.len();
 
     // First identify the *conflict-free tokens*: tokens that do not overlap with
-    // any other token in any way.
+    // any other token in any way, besides matching exactly the same string.
     let conflict_free_tokens: TokenSet = (0..n)
         .into_iter()
         .filter_map(|i| {
             let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
                 j != i
                     && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
-                    && token_conflict_map.does_conflict(i, j)
+                    && token_conflict_map.does_match_shorter_or_longer(i, j)
             });
             if conflicts_with_other_tokens {
                 None
diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs
index 1c4fc753..5c8b3ff5 100644
--- a/cli/src/generate/build_tables/token_conflicts.rs
+++ b/cli/src/generate/build_tables/token_conflicts.rs
@@ -65,6 +65,13 @@ impl<'a> TokenConflictMap<'a> {
             || entry.matches_same_string
     }
 
+    pub fn does_match_shorter_or_longer(&self, i: usize, j: usize) -> bool {
+        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
+        let reverse_entry = &self.status_matrix[matrix_index(self.n, j, i)];
+        (entry.does_match_valid_continuation || entry.does_match_separators)
+            && !reverse_entry.does_match_separators
+    }
+
     pub fn does_overlap(&self, i: usize, j: usize) -> bool {
         self.status_matrix[matrix_index(self.n, i, j)].does_overlap
     }
@@ -135,8 +142,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
             write!(
                 f,
                 "    {:?}: {:?},\n",
-                self.grammar.variables[i].name,
-                self.starting_chars_by_index[i]
+                self.grammar.variables[i].name, self.starting_chars_by_index[i]
             )?;
         }
         write!(f, "  }},\n")?;
@@ -230,8 +236,18 @@ fn compute_conflict_status(
             continue;
         }
 
+        let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
+
         let mut completion = None;
         for (id, precedence) in cursor.completions() {
+            if has_sep {
+                if id == i {
+                    result.0.does_match_separators = true;
+                } else {
+                    result.1.does_match_separators = true;
+                }
+            }
+
             if let Some((prev_id, prev_precedence)) = completion {
                 if id == prev_id {
                     continue;
@@ -263,8 +279,6 @@ fn compute_conflict_status(
             }
         }
 
-        let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
-
         for transition in cursor.transitions() {
             let mut can_advance = true;
             if let Some((completed_id, completed_precedence)) = completion {
@@ -298,17 +312,11 @@ fn compute_conflict_status(
                         if transition.characters.does_intersect(&following_chars[j]) {
                             result.0.does_match_valid_continuation = true;
                         }
-                        if transition.is_separator || has_sep {
-                            result.0.does_match_separators = true;
-                        }
                     } else {
                         result.1.does_overlap = true;
                         if transition.characters.does_intersect(&following_chars[i]) {
                             result.1.does_match_valid_continuation = true;
                         }
-                        if transition.is_separator || has_sep {
-                            result.1.does_match_separators = true;
-                        }
                     }
                 }
             }
@@ -414,6 +422,60 @@ mod tests {
         assert!(token_map.does_conflict(var("instanceof"), var("in")));
     }
 
+    #[test]
+    fn test_token_conflicts_with_separators() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: vec![Rule::pattern("\\s")],
+            variables: vec![
+                Variable {
+                    name: "x".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("x"),
+                },
+                Variable {
+                    name: "newline".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("\n"),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
+
+        assert!(token_map.does_conflict(var("newline"), var("x")));
+        assert!(!token_map.does_conflict(var("x"), var("newline")));
+    }
+
+    #[test]
+    fn test_token_conflicts_with_open_ended_tokens() {
+        let grammar = expand_tokens(ExtractedLexicalGrammar {
+            separators: vec![Rule::pattern("\\s")],
+            variables: vec![
+                Variable {
+                    name: "x".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::string("x"),
+                },
+                Variable {
+                    name: "anything".to_string(),
+                    kind: VariableType::Named,
+                    rule: Rule::prec(-1, Rule::pattern(".*")),
+                },
+            ],
+        })
+        .unwrap();
+
+        let var = |name| index_of_var(&grammar, name);
+
+        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);
+
+        assert!(token_map.does_match_shorter_or_longer(var("anything"), var("x")));
+        assert!(!token_map.does_match_shorter_or_longer(var("x"), var("anything")));
+    }
+
     fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
         grammar
             .variables

From 6105bf990937c282898cba2e03c69728f924f4c8 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 20 Jan 2019 16:58:49 -0800
Subject: [PATCH 180/208] Include error recovery examples in test suite

---
 .gitignore                                    |  2 +-
 cli/src/tests/corpuses.rs                     | 80 ++++++++++++-------
 cli/src/util.rs                               |  2 +-
 .../error_corpus/javascript_errors.txt        |  2 +-
 4 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/.gitignore b/.gitignore
index bcb55844..360390b1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-log.html
+log*.html
 
 .idea
 *.xcodeproj
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index b28b2510..6d46aacb 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -28,21 +28,9 @@ lazy_static! {
 #[test]
 fn test_real_language_corpus_files() {
     let mut log_session = None;
-    let mut parser = Parser::new();
+    let mut parser = get_parser(&mut log_session, "log1.html");
     let grammars_dir = fixtures_dir().join("grammars");
 
-    if *LOG_ENABLED {
-        parser.set_logger(Some(Box::new(|log_type, msg| {
-            if log_type == LogType::Lex {
-                eprintln!("  {}", msg);
-            } else {
-                eprintln!("{}", msg);
-            }
-        })));
-    } else if *LOG_GRAPH_ENABLED {
-        log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
-    }
-
     let mut did_fail = false;
     for language_name in LANGUAGES.iter().cloned() {
         if let Some(filter) = LANGUAGE_FILTER.as_ref() {
@@ -60,8 +48,35 @@ fn test_real_language_corpus_files() {
         did_fail |= run_mutation_tests(&mut parser, test);
     }
 
-    drop(parser);
-    drop(log_session);
+    if did_fail {
+        panic!("Corpus tests failed");
+    }
+}
+
+#[test]
+fn test_error_corpus_files() {
+    let mut log_session = None;
+    let mut parser = get_parser(&mut log_session, "log2.html");
+    let corpus_dir = fixtures_dir().join("error_corpus");
+
+    let mut did_fail = false;
+    for entry in fs::read_dir(&corpus_dir).unwrap() {
+        let entry = entry.unwrap();
+        let language_name = entry.file_name();
+        let language_name = language_name.to_str().unwrap().replace("_errors.txt", "");
+        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
+            if language_name != filter.as_str() {
+                continue;
+            }
+        }
+
+        eprintln!("language: {:?}", language_name);
+
+        let test = parse_tests(&entry.path()).unwrap();
+        let language = get_language(&language_name);
+        parser.set_language(language).unwrap();
+        did_fail |= run_mutation_tests(&mut parser, test);
+    }
 
     if did_fail {
         panic!("Corpus tests failed");
@@ -71,21 +86,9 @@ fn test_real_language_corpus_files() {
 #[test]
 fn test_feature_corpus_files() {
     let mut log_session = None;
-    let mut parser = Parser::new();
+    let mut parser = get_parser(&mut log_session, "log3.html");
     let test_grammars_dir = fixtures_dir().join("test_grammars");
 
-    if *LOG_ENABLED {
-        parser.set_logger(Some(Box::new(|log_type, msg| {
-            if log_type == LogType::Lex {
-                eprintln!("  {}", msg);
-            } else {
-                eprintln!("{}", msg);
-            }
-        })));
-    } else if *LOG_GRAPH_ENABLED {
-        log_session = Some(util::log_graphs(&mut parser, "log.html").unwrap());
-    }
-
     let mut did_fail = false;
     for entry in fs::read_dir(&test_grammars_dir).unwrap() {
         let entry = entry.unwrap();
@@ -134,9 +137,6 @@ fn test_feature_corpus_files() {
         }
     }
 
-    drop(parser);
-    drop(log_session);
-
     if did_fail {
         panic!("Corpus tests failed");
     }
@@ -179,3 +179,21 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) -> bool {
         }
     }
 }
+
+fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
+    let mut parser = Parser::new();
+
+    if *LOG_ENABLED {
+        parser.set_logger(Some(Box::new(|log_type, msg| {
+            if log_type == LogType::Lex {
+                eprintln!("  {}", msg);
+            } else {
+                eprintln!("{}", msg);
+            }
+        })));
+    } else if *LOG_GRAPH_ENABLED {
+        *session = Some(util::log_graphs(&mut parser, log_filename).unwrap());
+    }
+
+    parser
+}
diff --git a/cli/src/util.rs b/cli/src/util.rs
index b1073624..004d3b06 100644
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@@ -53,7 +53,7 @@ impl Drop for LogSession {
             if cfg!(target_os = "macos")
                 && fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64
             {
-                Command::new("open").arg("log.html").output().unwrap();
+                Command::new("open").arg(&self.0).output().unwrap();
             }
         } else {
             eprintln!(
diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt
index 1717b85d..ffa9d547 100644
--- a/test/fixtures/error_corpus/javascript_errors.txt
+++ b/test/fixtures/error_corpus/javascript_errors.txt
@@ -36,7 +36,7 @@ Missing object-literal values
 
 (program (expression_statement (object
   (pair (property_identifier) (identifier))
-  (pair (property_identifier) (yield_expression (MISSING))))))
+  (pair (property_identifier) (MISSING)))))
 
 ===================================================
 Extra identifiers in expressions

From 196339aaa9aad0cf9bdc4ef381f008c7c1651c54 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 21 Jan 2019 14:22:35 -0800
Subject: [PATCH 181/208] Assert no memory leaks by stubbing malloc/free in the
 test suite

---
 Cargo.lock                   |   7 +++
 cli/Cargo.toml               |   3 +
 cli/src/tests/allocations.rs | 104 +++++++++++++++++++++++++++++++++++
 cli/src/tests/corpuses.rs    |  29 +++++-----
 cli/src/tests/mod.rs         |   1 +
 lib/build.rs                 |  18 +++++-
 lib/src/subtree.c            |   2 +-
 7 files changed, 146 insertions(+), 18 deletions(-)
 create mode 100644 cli/src/tests/allocations.rs

diff --git a/Cargo.lock b/Cargo.lock
index 003978c1..936c60ee 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -502,6 +502,11 @@ name = "smallbitvec"
 version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
+[[package]]
+name = "spin"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "strsim"
 version = "0.7.0"
@@ -585,6 +590,7 @@ dependencies = [
  "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
  "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "tree-sitter 0.3.5",
 ]
 
@@ -702,6 +708,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
 "checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
 "checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
+"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55"
 "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
 "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
 "checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015"
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index d8e50bbf..5eb92079 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -34,3 +34,6 @@ features = ["preserve_order"]
 [dependencies.log]
 version = "0.4.6"
 features = ["std"]
+
+[dev-dependencies]
+spin = "0.5"
diff --git a/cli/src/tests/allocations.rs b/cli/src/tests/allocations.rs
new file mode 100644
index 00000000..c4a3dbac
--- /dev/null
+++ b/cli/src/tests/allocations.rs
@@ -0,0 +1,104 @@
+#![cfg(test)]
+#![allow(dead_code)]
+
+use spin::Mutex;
+use std::collections::HashMap;
+use std::os::raw::{c_ulong, c_void};
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct Allocation(*const c_void);
+unsafe impl Send for Allocation {}
+unsafe impl Sync for Allocation {}
+
+#[derive(Default)]
+struct AllocationRecorder {
+    enabled: bool,
+    allocation_count: u64,
+    outstanding_allocations: HashMap<Allocation, u64>,
+}
+
+lazy_static! {
+    static ref RECORDER: Mutex<AllocationRecorder> = Mutex::new(AllocationRecorder::default());
+}
+
+extern "C" {
+    fn malloc(size: c_ulong) -> *mut c_void;
+    fn calloc(count: c_ulong, size: c_ulong) -> *mut c_void;
+    fn realloc(ptr: *mut c_void, size: c_ulong) -> *mut c_void;
+    fn free(ptr: *mut c_void);
+}
+
+pub fn start_recording() {
+    let mut recorder = RECORDER.lock();
+    recorder.enabled = true;
+    recorder.allocation_count = 0;
+    recorder.outstanding_allocations.clear();
+}
+
+pub fn stop_recording() {
+    let mut recorder = RECORDER.lock();
+    recorder.enabled = false;
+
+    if !recorder.outstanding_allocations.is_empty() {
+        panic!(
+            "Leaked allocation indices: {:?}",
+            recorder
+                .outstanding_allocations
+                .iter()
+                .map(|e| e.1)
+                .collect::<Vec<_>>()
+        );
+    }
+}
+
+fn record_alloc(ptr: *mut c_void) {
+    let mut recorder = RECORDER.lock();
+    if recorder.enabled {
+        let count = recorder.allocation_count;
+        recorder.allocation_count += 1;
+        recorder
+            .outstanding_allocations
+            .insert(Allocation(ptr), count);
+    }
+}
+
+fn record_dealloc(ptr: *mut c_void) {
+    let mut recorder = RECORDER.lock();
+    if recorder.enabled {
+        recorder.outstanding_allocations.remove(&Allocation(ptr));
+    }
+}
+
+#[no_mangle]
+extern "C" fn ts_record_malloc(size: c_ulong) -> *const c_void {
+    let result = unsafe { malloc(size) };
+    record_alloc(result);
+    result
+}
+
+#[no_mangle]
+extern "C" fn ts_record_calloc(count: c_ulong, size: c_ulong) -> *const c_void {
+    let result = unsafe { calloc(count, size) };
+    record_alloc(result);
+    result
+}
+
+#[no_mangle]
+extern "C" fn ts_record_realloc(ptr: *mut c_void, size: c_ulong) -> *const c_void {
+    record_dealloc(ptr);
+    let result = unsafe { realloc(ptr, size) };
+    record_alloc(result);
+    result
+}
+
+#[no_mangle]
+extern "C" fn ts_record_free(ptr: *mut c_void) {
+    record_dealloc(ptr);
+    unsafe { free(ptr) };
+}
+
+#[no_mangle]
+extern "C" fn ts_record_allocations_toggle() {
+    let mut recorder = RECORDER.lock();
+    recorder.enabled = !recorder.enabled;
+}
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index 6d46aacb..2c205d40 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -1,9 +1,10 @@
+use super::allocations;
 use super::fixtures::{fixtures_dir, get_language, get_test_language};
 use crate::generate;
 use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
 use std::fs;
-use tree_sitter::{LogType, Parser};
+use tree_sitter::{Language, LogType, Parser};
 
 const LANGUAGES: &'static [&'static str] = &[
     "bash",
@@ -27,8 +28,6 @@ lazy_static! {
 
 #[test]
 fn test_real_language_corpus_files() {
-    let mut log_session = None;
-    let mut parser = get_parser(&mut log_session, "log1.html");
     let grammars_dir = fixtures_dir().join("grammars");
 
     let mut did_fail = false;
@@ -44,8 +43,7 @@ fn test_real_language_corpus_files() {
         let language = get_language(language_name);
         let corpus_dir = grammars_dir.join(language_name).join("corpus");
         let test = parse_tests(&corpus_dir).unwrap();
-        parser.set_language(language).unwrap();
-        did_fail |= run_mutation_tests(&mut parser, test);
+        did_fail |= run_mutation_tests(language, test);
     }
 
     if did_fail {
@@ -55,8 +53,6 @@ fn test_real_language_corpus_files() {
 
 #[test]
 fn test_error_corpus_files() {
-    let mut log_session = None;
-    let mut parser = get_parser(&mut log_session, "log2.html");
     let corpus_dir = fixtures_dir().join("error_corpus");
 
     let mut did_fail = false;
@@ -74,8 +70,7 @@ fn test_error_corpus_files() {
 
         let test = parse_tests(&entry.path()).unwrap();
         let language = get_language(&language_name);
-        parser.set_language(language).unwrap();
-        did_fail |= run_mutation_tests(&mut parser, test);
+        did_fail |= run_mutation_tests(language, test);
     }
 
     if did_fail {
@@ -85,8 +80,6 @@ fn test_error_corpus_files() {
 
 #[test]
 fn test_feature_corpus_files() {
-    let mut log_session = None;
-    let mut parser = get_parser(&mut log_session, "log3.html");
     let test_grammars_dir = fixtures_dir().join("test_grammars");
 
     let mut did_fail = false;
@@ -132,8 +125,7 @@ fn test_feature_corpus_files() {
             let c_code = generate_result.unwrap().1;
             let language = get_test_language(language_name, c_code, &test_path);
             let test = parse_tests(&corpus_path).unwrap();
-            parser.set_language(language).unwrap();
-            did_fail |= run_mutation_tests(&mut parser, test);
+            did_fail |= run_mutation_tests(language, test);
         }
     }
 
@@ -142,7 +134,7 @@ fn test_feature_corpus_files() {
     }
 }
 
-fn run_mutation_tests(parser: &mut Parser, test: TestEntry) -> bool {
+fn run_mutation_tests(language: Language, test: TestEntry) -> bool {
     match test {
         TestEntry::Example {
             name,
@@ -157,23 +149,30 @@ fn run_mutation_tests(parser: &mut Parser, test: TestEntry) -> bool {
 
             eprintln!("  example: {:?}", name);
 
+            allocations::start_recording();
+            let mut log_session = None;
+            let mut parser = get_parser(&mut log_session, "log.html");
+            parser.set_language(language).unwrap();
             let tree = parser
                 .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
                 .unwrap();
             let actual = tree.root_node().to_sexp();
+            drop(tree);
+            drop(parser);
             if actual != output {
                 print_diff_key();
                 print_diff(&actual, &output);
                 println!("");
                 true
             } else {
+                allocations::stop_recording();
                 false
             }
         }
         TestEntry::Group { children, .. } => {
             let mut result = false;
             for child in children {
-                result |= run_mutation_tests(parser, child);
+                result |= run_mutation_tests(language, child);
             }
             result
         }
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index a874358a..174be67b 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -1,3 +1,4 @@
+mod allocations;
 mod corpuses;
 mod fixtures;
 mod parser_api;
diff --git a/lib/build.rs b/lib/build.rs
index 2a121001..df66ee7c 100644
--- a/lib/build.rs
+++ b/lib/build.rs
@@ -1,6 +1,6 @@
 extern crate cc;
 
-use std::env;
+use std::{env, fs};
 use std::path::{Path, PathBuf};
 
 fn main() {
@@ -20,13 +20,27 @@ fn main() {
     }
 
     let mut config = cc::Build::new();
+
+    println!("cargo:rerun-if-env-changed=TREE_SITTER_TEST");
+    if env::var("TREE_SITTER_TEST").is_ok() {
+        config.define("TREE_SITTER_TEST", "");
+    }
+
+    let src_path = Path::new("src");
+
+    for entry in fs::read_dir(&src_path).unwrap() {
+        let entry = entry.unwrap();
+        let path = src_path.join(entry.file_name());
+        println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
+    }
+
     config
         .define("UTF8PROC_STATIC", "")
         .flag_if_supported("-std=c99")
         .flag_if_supported("-Wno-unused-parameter")
         .include("include")
         .include("utf8proc")
-        .file(Path::new("src").join("lib.c"))
+        .file(src_path.join("lib.c"))
         .compile("tree-sitter");
 }
 
diff --git a/lib/src/subtree.c b/lib/src/subtree.c
index 48c8cff3..3e353f99 100644
--- a/lib/src/subtree.c
+++ b/lib/src/subtree.c
@@ -855,7 +855,7 @@ char *ts_subtree_string(Subtree self, const TSLanguage *language, bool include_a
     language, true,
     include_all, 0, false
   ) + 1;
-  char *result = ts_malloc(size * sizeof(char));
+  char *result = malloc(size * sizeof(char));
   ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false);
   return result;
 }

From e305012b3107ba48fa9d75be4a8fa40c3adf5458 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 21 Jan 2019 15:33:43 -0800
Subject: [PATCH 182/208] Loosen keyword identification criteria slightly

---
 cli/src/generate/build_tables/mod.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index ca54f274..36f6770b 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -216,7 +216,7 @@ fn identify_keywords(
 
     // First find all of the candidate keyword tokens: tokens that start with
     // letters or underscore and can match the same string as a word token.
-    let keywords: TokenSet = lexical_grammar
+    let keyword_candidates: TokenSet = lexical_grammar
         .variables
         .iter()
         .enumerate()
@@ -237,10 +237,10 @@ fn identify_keywords(
         .collect();
 
     // Exclude keyword candidates that shadow another keyword candidate.
-    let keywords: TokenSet = keywords
+    let keywords: TokenSet = keyword_candidates
         .iter()
         .filter(|token| {
-            for other_token in keywords.iter() {
+            for other_token in keyword_candidates.iter() {
                 if other_token != *token
                     && token_conflict_map.does_match_same_string(other_token.index, token.index)
                 {
@@ -262,7 +262,7 @@ fn identify_keywords(
         .iter()
         .filter(|token| {
             for other_index in 0..lexical_grammar.variables.len() {
-                if keywords.contains(&Symbol::terminal(other_index)) {
+                if keyword_candidates.contains(&Symbol::terminal(other_index)) {
                     continue;
                 }
 

From 233d616ebfaca1dd354accdb95df32444a4eeef0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 25 Jan 2019 12:05:21 -0800
Subject: [PATCH 183/208] Add random mutation tests

---
 Cargo.lock                   |   1 +
 cli/Cargo.toml               |   1 +
 cli/src/test.rs              |   8 +-
 cli/src/tests/allocations.rs |  12 +-
 cli/src/tests/corpuses.rs    | 386 +++++++++++++++++++++++++++--------
 cli/src/tests/mod.rs         |   1 +
 cli/src/tests/parser_api.rs  |   8 +-
 cli/src/tests/random.rs      |  41 ++++
 lib/binding/lib.rs           |  32 ++-
 lib/src/parser.c             |  29 ++-
 lib/src/subtree.c            |   8 +-
 script/test                  |  43 ++--
 12 files changed, 443 insertions(+), 127 deletions(-)
 create mode 100644 cli/src/tests/random.rs

diff --git a/Cargo.lock b/Cargo.lock
index 936c60ee..5c2dcd62 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -583,6 +583,7 @@ dependencies = [
  "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 5eb92079..75efdb18 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -36,4 +36,5 @@ version = "0.4.6"
 features = ["std"]
 
 [dev-dependencies]
+rand = "0.6.4"
 spin = "0.5"
diff --git a/cli/src/test.rs b/cli/src/test.rs
index 3a40eb83..b8b78b8f 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -36,6 +36,12 @@ pub enum TestEntry {
     },
 }
 
+impl Default for TestEntry {
+    fn default() -> Self {
+        TestEntry::Group { name: String::new(), children: Vec::new() }
+    }
+}
+
 pub fn run_tests_at_path(
     language: Language,
     path: &Path,
@@ -160,7 +166,7 @@ fn run_tests(
 
 pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
     let name = path
-        .file_name()
+        .file_stem()
         .and_then(|s| s.to_str())
         .unwrap_or("")
         .to_string();
diff --git a/cli/src/tests/allocations.rs b/cli/src/tests/allocations.rs
index c4a3dbac..e3cdae27 100644
--- a/cli/src/tests/allocations.rs
+++ b/cli/src/tests/allocations.rs
@@ -40,13 +40,15 @@ pub fn stop_recording() {
     recorder.enabled = false;
 
     if !recorder.outstanding_allocations.is_empty() {
+        let mut allocation_indices = recorder
+            .outstanding_allocations
+            .iter()
+            .map(|e| e.1)
+            .collect::<Vec<_>>();
+        allocation_indices.sort_unstable();
         panic!(
             "Leaked allocation indices: {:?}",
-            recorder
-                .outstanding_allocations
-                .iter()
-                .map(|e| e.1)
-                .collect::<Vec<_>>()
+            allocation_indices
         );
     }
 }
diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index 2c205d40..76ed02d0 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -1,11 +1,14 @@
 use super::allocations;
 use super::fixtures::{fixtures_dir, get_language, get_test_language};
+use super::random::Rand;
 use crate::generate;
 use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
-use std::fs;
-use tree_sitter::{Language, LogType, Parser};
+use std::{env, fs, time, usize};
+use tree_sitter::{InputEdit, LogType, Parser, Point, Tree};
 
+const EDIT_COUNT: usize = 3;
+const TRIAL_COUNT: usize = 10;
 const LANGUAGES: &'static [&'static str] = &[
     "bash",
     "c",
@@ -18,19 +21,30 @@ const LANGUAGES: &'static [&'static str] = &[
 ];
 
 lazy_static! {
-    static ref LANGUAGE_FILTER: Option<String> =
-        std::env::var("TREE_SITTER_TEST_LANGUAGE_FILTER").ok();
-    static ref EXAMPLE_FILTER: Option<String> =
-        std::env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
-    static ref LOG_ENABLED: bool = std::env::var("TREE_SITTER_ENABLE_LOG").is_ok();
-    static ref LOG_GRAPH_ENABLED: bool = std::env::var("TREE_SITTER_ENABLE_LOG_GRAPHS").is_ok();
+    static ref LOG_ENABLED: bool = env::var("TREE_SITTER_TEST_ENABLE_LOG").is_ok();
+    static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_TEST_ENABLE_LOG_GRAPHS").is_ok();
+    static ref LANGUAGE_FILTER: Option<String> = env::var("TREE_SITTER_TEST_LANGUAGE_FILTER").ok();
+    static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
+    static ref TRIAL_FILTER: Option<usize> = env::var("TREE_SITTER_TEST_TRIAL_FILTER")
+        .map(|s| usize::from_str_radix(&s, 10).unwrap())
+        .ok();
+    pub static ref SEED: usize = env::var("TREE_SITTER_TEST_SEED")
+        .map(|s| usize::from_str_radix(&s, 10).unwrap())
+        .unwrap_or(
+            time::SystemTime::now()
+                .duration_since(time::UNIX_EPOCH)
+                .unwrap()
+                .as_secs() as usize,
+        );
 }
 
 #[test]
 fn test_real_language_corpus_files() {
+    eprintln!("\n\nRandom seed: {}\n", *SEED);
     let grammars_dir = fixtures_dir().join("grammars");
+    let error_corpus_dir = fixtures_dir().join("error_corpus");
 
-    let mut did_fail = false;
+    let mut failure_count = 0;
     for language_name in LANGUAGES.iter().cloned() {
         if let Some(filter) = LANGUAGE_FILTER.as_ref() {
             if language_name != filter.as_str() {
@@ -38,51 +52,133 @@ fn test_real_language_corpus_files() {
             }
         }
 
-        eprintln!("language: {:?}", language_name);
-
         let language = get_language(language_name);
         let corpus_dir = grammars_dir.join(language_name).join("corpus");
-        let test = parse_tests(&corpus_dir).unwrap();
-        did_fail |= run_mutation_tests(language, test);
-    }
+        let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
+        let main_tests = parse_tests(&corpus_dir).unwrap();
+        let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
+        let mut tests = flatten_tests(main_tests);
+        tests.extend(flatten_tests(error_tests));
 
-    if did_fail {
-        panic!("Corpus tests failed");
+        if !tests.is_empty() {
+            eprintln!("language: {:?}", language_name);
+        }
+
+        for (example_name, input, expected_output) in tests {
+            eprintln!("  example: {:?}", example_name);
+
+            if TRIAL_FILTER.map_or(true, |t| t == 0) {
+                allocations::start_recording();
+                let mut log_session = None;
+                let mut parser = get_parser(&mut log_session, "log.html");
+                parser.set_language(language).unwrap();
+                let tree = parser.parse_utf8(&mut |i, _| &input[i..], None).unwrap();
+                let actual_output = tree.root_node().to_sexp();
+                drop(tree);
+                drop(parser);
+                if actual_output != expected_output {
+                    print_diff_key();
+                    print_diff(&actual_output, &expected_output);
+                    println!("");
+                    failure_count += 1;
+                    continue;
+                }
+                allocations::stop_recording();
+            }
+
+            let mut parser = Parser::new();
+            parser.set_language(language).unwrap();
+            let tree = parser
+                .parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), None)
+                .unwrap();
+            drop(parser);
+
+            for trial in 1..=TRIAL_COUNT {
+                if TRIAL_FILTER.map_or(true, |filter| filter == trial) {
+                    let mut rand = Rand::new(*SEED + trial);
+
+                    allocations::start_recording();
+                    let mut log_session = None;
+                    let mut parser = get_parser(&mut log_session, "log.html");
+                    parser.set_language(language).unwrap();
+                    let mut tree = tree.clone();
+                    let mut input = input.clone();
+
+                    if *LOG_GRAPH_ENABLED {
+                        eprintln!("{}\n", String::from_utf8_lossy(&input));
+                    }
+
+                    // Perform a random series of edits and reparse.
+                    let mut undo_stack = Vec::new();
+                    for _ in 0..EDIT_COUNT {
+                        let edit = get_random_edit(&mut rand, &input);
+                        undo_stack.push(invert_edit(&input, &edit));
+                        perform_edit(&mut tree, &mut input, &edit);
+                    }
+                    if *LOG_GRAPH_ENABLED {
+                        eprintln!("{}\n", String::from_utf8_lossy(&input));
+                    }
+
+                    let mut tree2 = parser
+                        .parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), Some(&tree))
+                        .unwrap();
+
+                    // Check that the new tree is consistent.
+                    check_consistent_sizes(&tree2, &input);
+                    check_changed_ranges(&tree, &tree2, &input);
+
+                    // Undo all of the edits and re-parse again.
+                    while let Some(edit) = undo_stack.pop() {
+                        perform_edit(&mut tree2, &mut input, &edit);
+                    }
+                    if *LOG_GRAPH_ENABLED {
+                        eprintln!("{}\n", String::from_utf8_lossy(&input));
+                    }
+
+                    let tree3 = parser
+                        .parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), Some(&tree2))
+                        .unwrap();
+
+                    // Check that the edited tree is consistent.
+                    check_consistent_sizes(&tree3, &input);
+                    check_changed_ranges(&tree2, &tree3, &input);
+
+                    // Verify that the final tree matches the expectation from the corpus.
+                    let actual_output = tree3.root_node().to_sexp();
+                    if actual_output != expected_output {
+                        println!("Incorrect parse for {} - {} - trial {}", language_name, example_name, trial);
+                        print_diff_key();
+                        print_diff(&actual_output, &expected_output);
+                        println!("");
+                        failure_count += 1;
+                        // break;
+                    }
+
+                    drop(tree);
+                    drop(tree2);
+                    drop(tree3);
+                    drop(parser);
+                    allocations::stop_recording();
+                }
+            }
+        }
+    }
+    if failure_count > 0 {
+        panic!("{} corpus tests failed", failure_count);
     }
 }
 
-#[test]
-fn test_error_corpus_files() {
-    let corpus_dir = fixtures_dir().join("error_corpus");
-
-    let mut did_fail = false;
-    for entry in fs::read_dir(&corpus_dir).unwrap() {
-        let entry = entry.unwrap();
-        let language_name = entry.file_name();
-        let language_name = language_name.to_str().unwrap().replace("_errors.txt", "");
-        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
-            if language_name != filter.as_str() {
-                continue;
-            }
-        }
-
-        eprintln!("language: {:?}", language_name);
-
-        let test = parse_tests(&entry.path()).unwrap();
-        let language = get_language(&language_name);
-        did_fail |= run_mutation_tests(language, test);
-    }
-
-    if did_fail {
-        panic!("Corpus tests failed");
-    }
+struct Edit {
+    position: usize,
+    deleted_length: usize,
+    inserted_text: Vec<u8>,
 }
 
 #[test]
 fn test_feature_corpus_files() {
     let test_grammars_dir = fixtures_dir().join("test_grammars");
 
-    let mut did_fail = false;
+    let mut failure_count = 0;
     for entry in fs::read_dir(&test_grammars_dir).unwrap() {
         let entry = entry.unwrap();
         if !entry.metadata().unwrap().is_dir() {
@@ -97,8 +193,6 @@ fn test_feature_corpus_files() {
             }
         }
 
-        eprintln!("test language: {:?}", language_name);
-
         let test_path = entry.path();
         let grammar_path = test_path.join("grammar.json");
         let error_message_path = test_path.join("expected_error.txt");
@@ -106,79 +200,156 @@ fn test_feature_corpus_files() {
         let generate_result = generate::generate_parser_for_grammar(&grammar_json);
 
         if error_message_path.exists() {
+            if EXAMPLE_FILTER.is_some() {
+                continue;
+            }
+
+            eprintln!("test language: {:?}", language_name);
+
             let expected_message = fs::read_to_string(&error_message_path).unwrap();
             if let Err(e) = generate_result {
                 if e.0 != expected_message {
-                    panic!(
+                    eprintln!(
                         "Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n",
                         expected_message, e.0
                     );
+                    failure_count += 1;
                 }
             } else {
-                panic!(
+                eprintln!(
                     "Expected error message but got none for test grammar '{}'",
                     language_name
                 );
+                failure_count += 1;
             }
         } else {
             let corpus_path = test_path.join("corpus.txt");
             let c_code = generate_result.unwrap().1;
             let language = get_test_language(language_name, c_code, &test_path);
             let test = parse_tests(&corpus_path).unwrap();
-            did_fail |= run_mutation_tests(language, test);
-        }
-    }
+            let tests = flatten_tests(test);
 
-    if did_fail {
-        panic!("Corpus tests failed");
-    }
-}
+            if !tests.is_empty() {
+                eprintln!("test language: {:?}", language_name);
+            }
 
-fn run_mutation_tests(language: Language, test: TestEntry) -> bool {
-    match test {
-        TestEntry::Example {
-            name,
-            input,
-            output,
-        } => {
-            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
-                if !name.contains(filter.as_str()) {
-                    return false;
+            for (name, input, expected_output) in tests {
+                eprintln!("  example: {:?}", name);
+
+                allocations::start_recording();
+                let mut log_session = None;
+                let mut parser = get_parser(&mut log_session, "log.html");
+                parser.set_language(language).unwrap();
+                let tree = parser.parse_utf8(&mut |i, _| &input[i..], None).unwrap();
+                let actual_output = tree.root_node().to_sexp();
+                drop(tree);
+                drop(parser);
+                if actual_output != expected_output {
+                    print_diff_key();
+                    print_diff(&actual_output, &expected_output);
+                    println!("");
+                    failure_count += 1;
+                    continue;
                 }
-            }
-
-            eprintln!("  example: {:?}", name);
-
-            allocations::start_recording();
-            let mut log_session = None;
-            let mut parser = get_parser(&mut log_session, "log.html");
-            parser.set_language(language).unwrap();
-            let tree = parser
-                .parse_utf8(&mut |byte_offset, _| &input[byte_offset..], None)
-                .unwrap();
-            let actual = tree.root_node().to_sexp();
-            drop(tree);
-            drop(parser);
-            if actual != output {
-                print_diff_key();
-                print_diff(&actual, &output);
-                println!("");
-                true
-            } else {
                 allocations::stop_recording();
-                false
             }
         }
-        TestEntry::Group { children, .. } => {
-            let mut result = false;
-            for child in children {
-                result |= run_mutation_tests(language, child);
-            }
-            result
+    }
+    if failure_count > 0 {
+        panic!("{} corpus tests failed", failure_count);
+    }
+}
+
+fn get_random_edit(rand: &mut Rand, input: &Vec<u8>) -> Edit {
+    let choice = rand.unsigned(10);
+    if choice < 2 {
+        // Insert text at end
+        let inserted_text = rand.words(3);
+        Edit {
+            position: input.len(),
+            deleted_length: 0,
+            inserted_text,
+        }
+    } else if choice < 5 {
+        // Delete text from the end
+        let mut deleted_length = rand.unsigned(10);
+        if deleted_length > input.len() {
+            deleted_length = input.len();
+        }
+        Edit {
+            position: input.len() - deleted_length,
+            deleted_length,
+            inserted_text: vec![],
+        }
+    } else if choice < 8 {
+        // Insert at a random position
+        let position = rand.unsigned(input.len());
+        let word_count = 1 + rand.unsigned(3);
+        let inserted_text = rand.words(word_count);
+        Edit {
+            position,
+            deleted_length: 0,
+            inserted_text,
+        }
+    } else {
+        // Replace at random position
+        let position = rand.unsigned(input.len());
+        let deleted_length = rand.unsigned(input.len() - position);
+        let word_count = 1 + rand.unsigned(3);
+        let inserted_text = rand.words(word_count);
+        Edit {
+            position,
+            deleted_length,
+            inserted_text,
         }
     }
 }
 
+fn invert_edit(input: &Vec<u8>, edit: &Edit) -> Edit {
+    let position = edit.position;
+    let removed_content = &input[position..(position + edit.deleted_length)];
+    Edit {
+        position,
+        deleted_length: edit.inserted_text.len(),
+        inserted_text: removed_content.to_vec(),
+    }
+}
+
+fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) {
+    let start_byte = edit.position;
+    let old_end_byte = edit.position + edit.deleted_length;
+    let new_end_byte = edit.position + edit.inserted_text.len();
+    let start_position = position_for_offset(input, start_byte);
+    let old_end_position = position_for_offset(input, old_end_byte);
+    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
+    let new_end_position = position_for_offset(input, new_end_byte);
+    tree.edit(&InputEdit {
+        start_byte,
+        old_end_byte,
+        new_end_byte,
+        start_position,
+        old_end_position,
+        new_end_position,
+    });
+}
+
+fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
+    let mut result = Point { row: 0, column: 0 };
+    for c in &input[0..offset] {
+        if *c as char == '\n' {
+            result.row += 1;
+            result.column = 0;
+        } else {
+            result.column += 1;
+        }
+    }
+    result
+}
+
+fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {}
+
+fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) {}
+
 fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
     let mut parser = Parser::new();
 
@@ -196,3 +367,38 @@ fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Par
 
     parser
 }
+
+fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String)> {
+    fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec<u8>, String)>) {
+        match test {
+            TestEntry::Example {
+                mut name,
+                input,
+                output,
+            } => {
+                if !prefix.is_empty() {
+                    name.insert_str(0, " - ");
+                    name.insert_str(0, prefix);
+                }
+                if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                    if !name.contains(filter.as_str()) {
+                        return;
+                    }
+                }
+                result.push((name, input, output));
+            }
+            TestEntry::Group { mut name, children } => {
+                if !prefix.is_empty() {
+                    name.insert_str(0, " - ");
+                    name.insert_str(0, prefix);
+                }
+                for child in children {
+                    helper(child, &name, result);
+                }
+            }
+        }
+    }
+    let mut result = Vec::new();
+    helper(test, "", &mut result);
+    result
+}
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index 174be67b..3a0c607a 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -1,4 +1,5 @@
 mod allocations;
 mod corpuses;
 mod fixtures;
+mod random;
 mod parser_api;
diff --git a/cli/src/tests/parser_api.rs b/cli/src/tests/parser_api.rs
index 9a4ce9f1..38bc0b69 100644
--- a/cli/src/tests/parser_api.rs
+++ b/cli/src/tests/parser_api.rs
@@ -2,10 +2,6 @@ use super::fixtures::get_language;
 use std::thread;
 use tree_sitter::{InputEdit, Language, LogType, Parser, Point, PropertySheet};
 
-fn rust() -> Language {
-    get_language("rust")
-}
-
 #[test]
 fn test_basic_parsing() {
     let mut parser = Parser::new();
@@ -505,3 +501,7 @@ fn test_parallel_parsing() {
 
     assert_eq!(child_count_differences, &[1, 2, 3, 4]);
 }
+
+fn rust() -> Language {
+    get_language("rust")
+}
diff --git a/cli/src/tests/random.rs b/cli/src/tests/random.rs
new file mode 100644
index 00000000..3c8394e7
--- /dev/null
+++ b/cli/src/tests/random.rs
@@ -0,0 +1,41 @@
+use rand::distributions::Alphanumeric;
+use rand::prelude::{Rng, SeedableRng, SmallRng};
+
+const OPERATORS: &[char] = &[
+    '+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.',
+];
+
+pub struct Rand(SmallRng);
+
+impl Rand {
+    pub fn new(seed: usize) -> Self {
+        Rand(SmallRng::seed_from_u64(seed as u64))
+    }
+
+    pub fn unsigned(&mut self, max: usize) -> usize {
+        self.0.gen_range(0, max + 1)
+    }
+
+    pub fn words(&mut self, max_count: usize) -> Vec<u8> {
+        let mut result = Vec::new();
+        let word_count = self.unsigned(max_count);
+        for i in 0..word_count {
+            if i > 0 {
+                if self.unsigned(5) == 0 {
+                    result.push('\n' as u8);
+                } else {
+                    result.push(' ' as u8);
+                }
+            }
+            if self.unsigned(3) == 0 {
+                let index = self.unsigned(OPERATORS.len() - 1);
+                result.push(OPERATORS[index] as u8);
+            } else {
+                for _ in 0..self.unsigned(8) {
+                    result.push(self.0.sample(Alphanumeric) as u8);
+                }
+            }
+        }
+        result
+    }
+}
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index 8143fd6b..1f29e28a 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -18,6 +18,7 @@ use std::io::{self, Read, Seek};
 use std::marker::PhantomData;
 use std::os::raw::{c_char, c_void};
 use std::ptr;
+use std::slice;
 use std::str;
 use std::u16;
 
@@ -427,6 +428,18 @@ impl Tree {
     ) -> TreePropertyCursor<'a, P> {
         TreePropertyCursor::new(self, property_sheet, source)
     }
+
+    pub fn changed_ranges(&self, other: &Tree) -> Vec<Range> {
+        unsafe {
+            let mut count = 0;
+            let ptr =
+                ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
+            let ranges = slice::from_raw_parts(ptr, count);
+            let result = ranges.into_iter().map(|r| r.clone().into()).collect();
+            free(ptr as *mut c_void);
+            result
+        }
+    }
 }
 
 unsafe impl Send for Tree {}
@@ -558,10 +571,6 @@ impl<'tree> Node<'tree> {
     }
 
     pub fn to_sexp(&self) -> String {
-        extern "C" {
-            fn free(pointer: *mut c_void);
-        }
-
         let c_string = unsafe { ffi::ts_node_string(self.0) };
         let result = unsafe { CStr::from_ptr(c_string) }
             .to_str()
@@ -788,6 +797,17 @@ impl Into<ffi::TSRange> for Range {
     }
 }
 
+impl From<ffi::TSRange> for Range {
+    fn from(range: ffi::TSRange) -> Self {
+        Self {
+            start_byte: range.start_byte as usize,
+            end_byte: range.end_byte as usize,
+            start_point: range.start_point.into(),
+            end_point: range.end_point.into(),
+        }
+    }
+}
+
 impl<P> PropertySheet<P> {
     pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError>
     where
@@ -860,3 +880,7 @@ impl<P> PropertySheet<P> {
         })
     }
 }
+
+extern "C" {
+    fn free(pointer: *mut c_void);
+}
diff --git a/lib/src/parser.c b/lib/src/parser.c
index c2ebfeeb..56326feb 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -28,10 +28,10 @@
     fputs("\n\n", self->dot_graph_file);                                         \
   }
 
-#define LOG_TREE()                                                                         \
-  if (self->dot_graph_file) {                                                              \
-    ts_subtree_print_dot_graph(self->finished_tree, self->language, self->dot_graph_file); \
-    fputs("\n", self->dot_graph_file);                                                     \
+#define LOG_TREE(tree)                                                      \
+  if (self->dot_graph_file) {                                               \
+    ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
+    fputs("\n", self->dot_graph_file);                                      \
   }
 
 #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
@@ -417,6 +417,13 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
       parse_state,
       self->language
     );
+
+    LOG(
+      "lexed_lookahead sym:%s, size:%u, character:'%c'",
+      SYM_NAME(ts_subtree_symbol(result)),
+      ts_subtree_total_size(result).bytes,
+      first_error_character
+    );
   } else {
     if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
       self->lexer.token_start_position = self->lexer.token_end_position;
@@ -467,13 +474,14 @@ static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId pa
         length
       );
     }
+
+    LOG(
+      "lexed_lookahead sym:%s, size:%u",
+      SYM_NAME(ts_subtree_symbol(result)),
+      ts_subtree_total_size(result).bytes
+    );
   }
 
-  LOG(
-    "lexed_lookahead sym:%s, size:%u",
-    SYM_NAME(ts_subtree_symbol(result)),
-    ts_subtree_total_size(result).bytes
-  );
   return result;
 }
 
@@ -1623,6 +1631,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
     );
     reusable_node_reset(&self->reusable_node, old_tree->root);
     LOG("parse_after_edit");
+    LOG_TREE(self->old_tree);
     for (unsigned i = 0; i < self->included_range_differences.size; i++) {
       TSRange *range = &self->included_range_differences.contents[i];
       LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
@@ -1681,7 +1690,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
 
   ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
   LOG("done");
-  LOG_TREE();
+  LOG_TREE(self->finished_tree);
 
   TSTree *result = ts_tree_new(
     self->finished_tree,
diff --git a/lib/src/subtree.c b/lib/src/subtree.c
index 3e353f99..00af7507 100644
--- a/lib/src/subtree.c
+++ b/lib/src/subtree.c
@@ -879,7 +879,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
     "error-cost: %u\n"
     "has-changes: %u\n"
     "repeat-depth: %u\n"
-    "lookahead-bytes: %u\"]\n",
+    "lookahead-bytes: %u",
     start_offset, end_offset,
     ts_subtree_parse_state(*self),
     ts_subtree_error_cost(*self),
@@ -888,6 +888,12 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
     ts_subtree_lookahead_bytes(*self)
   );
 
+  if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) {
+    fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
+  }
+
+  fprintf(f, "\"]\n");
+
   uint32_t child_start_offset = start_offset;
   uint32_t structural_child_index = 0;
   const TSSymbol *alias_sequence = ts_language_alias_sequence(
diff --git a/script/test b/script/test
index 43c274f7..ba95a754 100755
--- a/script/test
+++ b/script/test
@@ -10,19 +10,23 @@ USAGE
 
 OPTIONS
 
-  -h  print this message
+  -h  Print this message
 
   -a  Compile C code with the Clang static analyzer
 
-  -l  run only the corpus tests for the given language
+  -l  Run only the corpus tests for the given language
 
-  -e  run only the corpus tests whose name contain the given string
+  -e  Run only the corpus tests whose name contain the given string
 
-  -s  set the seed used to control random behavior
+  -t  Run only the given trial number of randomized test
 
-  -d  print parsing log to stderr
+  -s  Set the seed used to control random behavior
 
-  -D  pipe tests' stderr to \`dot(1)\` to render an SVG log
+  -d  Print parsing log to stderr
+
+  -D  Generate an SVG graph of parsing logs
+
+  -g  Run the tests with a debugger
 
 EOF
 }
@@ -31,7 +35,9 @@ export TREE_SITTER_TEST=1
 export RUST_TEST_THREADS=1
 export RUST_BACKTRACE=full
 
-while getopts "bdl:e:s:gGhpvD" option; do
+mode=normal
+
+while getopts "dDghl:e:s:t:" option; do
   case ${option} in
     h)
       usage
@@ -43,22 +49,35 @@ while getopts "bdl:e:s:gGhpvD" option; do
     e)
       export TREE_SITTER_TEST_EXAMPLE_FILTER=${OPTARG}
       ;;
+    t)
+      export TREE_SITTER_TEST_TRIAL_FILTER=${OPTARG}
+      ;;
     s)
-      export TREE_SITTER_SEED=${OPTARG}
+      export TREE_SITTER_TEST_SEED=${OPTARG}
       ;;
     d)
-      export TREE_SITTER_ENABLE_LOG=1
+      export TREE_SITTER_TEST_ENABLE_LOG=1
       ;;
     D)
-      export TREE_SITTER_ENABLE_LOG_GRAPHS=1
+      export TREE_SITTER_TEST_ENABLE_LOG_GRAPHS=1
+      ;;
+    g)
+      mode=debug
       ;;
   esac
 done
 
-if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER ]]; then
+shift $(expr $OPTIND - 1 )
+
+if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER || -n $TREE_SITTER_TEST_TRIAL_FILTER ]]; then
   top_level_filter=corpus
 else
   top_level_filter=$1
 fi
 
-cargo test --jobs 1 $top_level_filter -- --nocapture
+if [[ "${mode}" == "debug" ]]; then
+  test_binary=$(cargo test --no-run --message-format=json 2> /dev/null | jq -rs '.[-1].filenames[0]')
+  lldb "${test_binary}" -- $top_level_filter
+else
+  cargo test --jobs 1 $top_level_filter -- --nocapture
+fi

From 5a12fbd927dfff6b44aace2e6ee4c7da1d018d71 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 25 Jan 2019 15:20:34 -0800
Subject: [PATCH 184/208] Verify changed ranges in randomized tests

---
 cli/src/tests/corpuses.rs       | 33 ++++++++---
 cli/src/tests/mod.rs            |  1 +
 cli/src/tests/scope_sequence.rs | 97 +++++++++++++++++++++++++++++++++
 lib/binding/helper.c            | 17 ++++++
 lib/binding/lib.rs              |  7 ++-
 lib/build.rs                    |  1 +
 script/test.cmd                 |  2 +-
 7 files changed, 146 insertions(+), 12 deletions(-)
 create mode 100644 cli/src/tests/scope_sequence.rs
 create mode 100644 lib/binding/helper.c

diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpuses.rs
index 76ed02d0..9b60d685 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpuses.rs
@@ -1,6 +1,7 @@
 use super::allocations;
 use super::fixtures::{fixtures_dir, get_language, get_test_language};
 use super::random::Rand;
+use super::scope_sequence::ScopeSequence;
 use crate::generate;
 use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
@@ -125,7 +126,11 @@ fn test_real_language_corpus_files() {
 
                     // Check that the new tree is consistent.
                     check_consistent_sizes(&tree2, &input);
-                    check_changed_ranges(&tree, &tree2, &input);
+                    if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
+                        println!("\nUnexpected scope change in trial {}\n{}\n\n", trial, message);
+                        failure_count += 1;
+                        break;
+                    }
 
                     // Undo all of the edits and re-parse again.
                     while let Some(edit) = undo_stack.pop() {
@@ -139,19 +144,26 @@ fn test_real_language_corpus_files() {
                         .parse_utf8(&mut |i, _| input.get(i..).unwrap_or(&[]), Some(&tree2))
                         .unwrap();
 
-                    // Check that the edited tree is consistent.
-                    check_consistent_sizes(&tree3, &input);
-                    check_changed_ranges(&tree2, &tree3, &input);
-
                     // Verify that the final tree matches the expectation from the corpus.
                     let actual_output = tree3.root_node().to_sexp();
                     if actual_output != expected_output {
-                        println!("Incorrect parse for {} - {} - trial {}", language_name, example_name, trial);
+                        println!(
+                            "Incorrect parse for {} - {} - trial {}",
+                            language_name, example_name, trial
+                        );
                         print_diff_key();
                         print_diff(&actual_output, &expected_output);
                         println!("");
                         failure_count += 1;
-                        // break;
+                        break;
+                    }
+
+                    // Check that the edited tree is consistent.
+                    check_consistent_sizes(&tree3, &input);
+                    if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
+                        eprintln!("Unexpected scope change in trial {}\n{}\n\n", trial, message);
+                        failure_count += 1;
+                        break;
                     }
 
                     drop(tree);
@@ -348,7 +360,12 @@ fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
 
 fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {}
 
-fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) {}
+fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String> {
+    let changed_ranges = old_tree.changed_ranges(new_tree);
+    let old_scope_sequence = ScopeSequence::new(old_tree);
+    let new_scope_sequence = ScopeSequence::new(new_tree);
+    old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
+}
 
 fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
     let mut parser = Parser::new();
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index 3a0c607a..fa841382 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -3,3 +3,4 @@ mod corpuses;
 mod fixtures;
 mod random;
 mod parser_api;
+mod scope_sequence;
diff --git a/cli/src/tests/scope_sequence.rs b/cli/src/tests/scope_sequence.rs
new file mode 100644
index 00000000..685fe91f
--- /dev/null
+++ b/cli/src/tests/scope_sequence.rs
@@ -0,0 +1,97 @@
+use tree_sitter::{Point, Range, Tree};
+
+#[derive(Debug)]
+pub struct ScopeSequence(Vec<ScopeStack>);
+
+type ScopeStack = Vec<&'static str>;
+
+impl ScopeSequence {
+    pub fn new(tree: &Tree) -> Self {
+        let mut result = ScopeSequence(Vec::new());
+        let mut scope_stack = Vec::new();
+
+        let mut cursor = tree.walk();
+        let mut visited_children = false;
+        loop {
+            let node = cursor.node();
+            for _ in result.0.len()..node.start_byte() {
+                result.0.push(scope_stack.clone());
+            }
+            if visited_children {
+                for _ in result.0.len()..node.end_byte() {
+                    result.0.push(scope_stack.clone());
+                }
+                scope_stack.pop();
+                if cursor.goto_next_sibling() {
+                    visited_children = false;
+                } else if !cursor.goto_parent() {
+                    break;
+                }
+            } else {
+                scope_stack.push(cursor.node().kind());
+                if !cursor.goto_first_child() {
+                    visited_children = true;
+                }
+            }
+        }
+
+        result
+    }
+
+    pub fn check_changes(
+        &self,
+        other: &ScopeSequence,
+        text: &Vec<u8>,
+        known_changed_ranges: &Vec<Range>,
+    ) -> Result<(), String> {
+        if self.0.len() != text.len() {
+            panic!(
+                "Inconsistent scope sequence: {:?}",
+                self.0.iter().zip(text.iter().map(|c| *c as char)).collect::<Vec<_>>()
+            );
+        }
+
+        assert_eq!(self.0.len(), other.0.len());
+        let mut position = Point { row: 0, column: 0 };
+        for (i, stack) in self.0.iter().enumerate() {
+            let other_stack = &other.0[i];
+            if *stack != *other_stack {
+                let containing_range = known_changed_ranges
+                    .iter()
+                    .find(|range| range.start_point <= position && position < range.end_point);
+                if containing_range.is_none() {
+                    let line = &text[(i - position.column)..]
+                        .split(|c| *c == '\n' as u8)
+                        .next()
+                        .unwrap();
+                    return Err(format!(
+                        concat!(
+                            "Position: {}\n",
+                            "Byte offset: {}\n",
+                            "Line: {}\n",
+                            "{}^\n",
+                            "Old scopes: {:?}\n",
+                            "New scopes: {:?}\n",
+                            "Invalidated ranges: {:?}",
+                        ),
+                        position,
+                        i,
+                        String::from_utf8_lossy(line),
+                        String::from(" ").repeat(position.column + "Line: ".len()),
+                        stack,
+                        other_stack,
+                        known_changed_ranges,
+                    ));
+                }
+            }
+
+            if text[i] == '\n' as u8 {
+                position.row += 1;
+                position.column = 0;
+            } else {
+                position.column += 1;
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/lib/binding/helper.c b/lib/binding/helper.c
new file mode 100644
index 00000000..4275e445
--- /dev/null
+++ b/lib/binding/helper.c
@@ -0,0 +1,17 @@
+#if defined(TREE_SITTER_TEST)
+
+void ts_record_free(void *);
+
+void rust_tree_sitter_free(void *p) {
+  ts_record_free(p);
+}
+
+#else
+
+void free(void *);
+
+void rust_tree_sitter_free(void *p) {
+  free(p);
+}
+
+#endif
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index 1f29e28a..150dfcf4 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -436,7 +436,7 @@ impl Tree {
                 ffi::ts_tree_get_changed_ranges(self.0, other.0, &mut count as *mut _ as *mut u32);
             let ranges = slice::from_raw_parts(ptr, count);
             let result = ranges.into_iter().map(|r| r.clone().into()).collect();
-            free(ptr as *mut c_void);
+            free_ptr(ptr as *mut c_void);
             result
         }
     }
@@ -576,7 +576,7 @@ impl<'tree> Node<'tree> {
             .to_str()
             .unwrap()
             .to_string();
-        unsafe { free(c_string as *mut c_void) };
+        unsafe { free_ptr(c_string as *mut c_void) };
         result
     }
 
@@ -882,5 +882,6 @@ impl<P> PropertySheet<P> {
 }
 
 extern "C" {
-    fn free(pointer: *mut c_void);
+    #[link_name = "rust_tree_sitter_free"]
+    fn free_ptr(ptr: *mut c_void);
 }
diff --git a/lib/build.rs b/lib/build.rs
index df66ee7c..7cca001c 100644
--- a/lib/build.rs
+++ b/lib/build.rs
@@ -41,6 +41,7 @@ fn main() {
         .include("include")
         .include("utf8proc")
         .file(src_path.join("lib.c"))
+        .file(Path::new("binding").join("helper.c"))
         .compile("tree-sitter");
 }
 
diff --git a/script/test.cmd b/script/test.cmd
index e62eed0e..ef4ce02e 100644
--- a/script/test.cmd
+++ b/script/test.cmd
@@ -4,4 +4,4 @@ set TREE_SITTER_TEST=1
 set RUST_TEST_THREADS=1
 set RUST_BACKTRACE=full
 
-cargo test "%~1"
+cargo test "%~1" -- --nocapture

From af83e8034e05b4aa00512128828bdd1b4046dffc Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 25 Jan 2019 16:40:26 -0800
Subject: [PATCH 185/208] Move test helpers into their own folder

---
 cli/src/tests/{corpuses.rs => corpus_test.rs}  | 18 ++++++++++++------
 cli/src/tests/{ => helpers}/allocations.rs     |  0
 cli/src/tests/{ => helpers}/fixtures.rs        |  0
 cli/src/tests/helpers/mod.rs                   |  4 ++++
 cli/src/tests/{ => helpers}/random.rs          |  0
 cli/src/tests/{ => helpers}/scope_sequence.rs  |  0
 cli/src/tests/mod.rs                           |  9 +++------
 .../{parser_api.rs => parser_api_test.rs}      |  4 ++--
 8 files changed, 21 insertions(+), 14 deletions(-)
 rename cli/src/tests/{corpuses.rs => corpus_test.rs} (96%)
 rename cli/src/tests/{ => helpers}/allocations.rs (100%)
 rename cli/src/tests/{ => helpers}/fixtures.rs (100%)
 create mode 100644 cli/src/tests/helpers/mod.rs
 rename cli/src/tests/{ => helpers}/random.rs (100%)
 rename cli/src/tests/{ => helpers}/scope_sequence.rs (100%)
 rename cli/src/tests/{parser_api.rs => parser_api_test.rs} (99%)

diff --git a/cli/src/tests/corpuses.rs b/cli/src/tests/corpus_test.rs
similarity index 96%
rename from cli/src/tests/corpuses.rs
rename to cli/src/tests/corpus_test.rs
index 9b60d685..587a3752 100644
--- a/cli/src/tests/corpuses.rs
+++ b/cli/src/tests/corpus_test.rs
@@ -1,7 +1,7 @@
-use super::allocations;
-use super::fixtures::{fixtures_dir, get_language, get_test_language};
-use super::random::Rand;
-use super::scope_sequence::ScopeSequence;
+use super::helpers::allocations;
+use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language};
+use super::helpers::random::Rand;
+use super::helpers::scope_sequence::ScopeSequence;
 use crate::generate;
 use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
@@ -127,7 +127,10 @@ fn test_real_language_corpus_files() {
                     // Check that the new tree is consistent.
                     check_consistent_sizes(&tree2, &input);
                     if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
-                        println!("\nUnexpected scope change in trial {}\n{}\n\n", trial, message);
+                        println!(
+                            "\nUnexpected scope change in trial {}\n{}\n\n",
+                            trial, message
+                        );
                         failure_count += 1;
                         break;
                     }
@@ -161,7 +164,10 @@ fn test_real_language_corpus_files() {
                     // Check that the edited tree is consistent.
                     check_consistent_sizes(&tree3, &input);
                     if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
-                        eprintln!("Unexpected scope change in trial {}\n{}\n\n", trial, message);
+                        eprintln!(
+                            "Unexpected scope change in trial {}\n{}\n\n",
+                            trial, message
+                        );
                         failure_count += 1;
                         break;
                     }
diff --git a/cli/src/tests/allocations.rs b/cli/src/tests/helpers/allocations.rs
similarity index 100%
rename from cli/src/tests/allocations.rs
rename to cli/src/tests/helpers/allocations.rs
diff --git a/cli/src/tests/fixtures.rs b/cli/src/tests/helpers/fixtures.rs
similarity index 100%
rename from cli/src/tests/fixtures.rs
rename to cli/src/tests/helpers/fixtures.rs
diff --git a/cli/src/tests/helpers/mod.rs b/cli/src/tests/helpers/mod.rs
new file mode 100644
index 00000000..bd5c6517
--- /dev/null
+++ b/cli/src/tests/helpers/mod.rs
@@ -0,0 +1,4 @@
+pub(super) mod allocations;
+pub(super) mod fixtures;
+pub(super) mod random;
+pub(super) mod scope_sequence;
diff --git a/cli/src/tests/random.rs b/cli/src/tests/helpers/random.rs
similarity index 100%
rename from cli/src/tests/random.rs
rename to cli/src/tests/helpers/random.rs
diff --git a/cli/src/tests/scope_sequence.rs b/cli/src/tests/helpers/scope_sequence.rs
similarity index 100%
rename from cli/src/tests/scope_sequence.rs
rename to cli/src/tests/helpers/scope_sequence.rs
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index fa841382..beafa172 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -1,6 +1,3 @@
-mod allocations;
-mod corpuses;
-mod fixtures;
-mod random;
-mod parser_api;
-mod scope_sequence;
+mod corpus_test;
+mod helpers;
+mod parser_api_test;
diff --git a/cli/src/tests/parser_api.rs b/cli/src/tests/parser_api_test.rs
similarity index 99%
rename from cli/src/tests/parser_api.rs
rename to cli/src/tests/parser_api_test.rs
index 38bc0b69..9584ac4e 100644
--- a/cli/src/tests/parser_api.rs
+++ b/cli/src/tests/parser_api_test.rs
@@ -1,4 +1,4 @@
-use super::fixtures::get_language;
+use super::helpers::fixtures::get_language;
 use std::thread;
 use tree_sitter::{InputEdit, Language, LogType, Parser, Point, PropertySheet};
 
@@ -453,7 +453,7 @@ fn test_editing() {
 fn test_parallel_parsing() {
     // Parse this source file so that each thread has a non-trivial amount of
     // work to do.
-    let this_file_source = include_str!("parser_api.rs");
+    let this_file_source = include_str!("parser_api_test.rs");
 
     let mut parser = Parser::new();
     parser.set_language(rust()).unwrap();

From 5927e104c2d8a0dada185cda8f6c6625e0bcf6fa Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 26 Jan 2019 22:22:29 -0800
Subject: [PATCH 186/208] Check tree consistency in randomized tests

---
 cli/src/tests/corpus_test.rs | 54 ++++++++++++++++++++++++++++++++++--
 script/test                  |  2 +-
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs
index 587a3752..449669e3 100644
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@@ -6,7 +6,7 @@ use crate::generate;
 use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
 use std::{env, fs, time, usize};
-use tree_sitter::{InputEdit, LogType, Parser, Point, Tree};
+use tree_sitter::{InputEdit, LogType, Node, Parser, Point, Tree};
 
 const EDIT_COUNT: usize = 3;
 const TRIAL_COUNT: usize = 10;
@@ -364,7 +364,57 @@ fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
     result
 }
 
-fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {}
+fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
+    fn check(node: Node, line_offsets: &Vec<usize>) {
+        let start_byte = node.start_byte();
+        let end_byte = node.end_byte();
+        let start_point = node.start_position();
+        let end_point = node.end_position();
+
+        assert!(start_byte <= end_byte);
+        assert!(start_point <= end_point);
+        assert_eq!(start_byte, line_offsets[start_point.row] + start_point.column);
+        assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
+
+        let mut last_child_end_byte = start_byte;
+        let mut last_child_end_point = start_point;
+        let mut some_child_has_changes = false;
+        let mut actual_named_child_count = 0;
+        for child in node.children() {
+            assert!(child.start_byte() >= last_child_end_byte);
+            assert!(child.start_position() >= last_child_end_point);
+            check(child, line_offsets);
+            if child.has_changes() {
+                some_child_has_changes = true;
+            }
+            if child.is_named() {
+                actual_named_child_count += 1;
+            }
+            last_child_end_byte = child.end_byte();
+            last_child_end_point = child.end_position();
+        }
+
+        assert_eq!(actual_named_child_count, node.named_child_count());
+
+        if node.child_count() > 0 {
+            assert!(end_byte >= last_child_end_byte);
+            assert!(end_point >= last_child_end_point);
+        }
+
+        if some_child_has_changes {
+            assert!(node.has_changes());
+        }
+    }
+
+    let mut line_offsets = vec![0];
+    for (i, c) in input.iter().enumerate() {
+        if *c == '\n' as u8 {
+            line_offsets.push(i + 1);
+        }
+    }
+
+    check(tree.root_node(), &line_offsets);
+}
 
 fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String> {
     let changed_ranges = old_tree.changed_ranges(new_tree);
diff --git a/script/test b/script/test
index ba95a754..09cf9f83 100755
--- a/script/test
+++ b/script/test
@@ -6,7 +6,7 @@ function usage {
   cat <<-EOF
 USAGE
 
-  $0  [-dgGhv] [-f focus-string] [-s seed]
+  $0 [-adDg] [-s SEED] [-l LANGUAGE] [-e EXAMPLE] [-t TRIAL]
 
 OPTIONS
 

From 6d8ef48dad886aa38ada6b0273a901197b8c55c5 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 27 Jan 2019 09:53:49 -0800
Subject: [PATCH 187/208] Make test subcommand exit 1 if tests fail

---
 cli/src/main.rs | 4 +++-
 cli/src/test.rs | 7 ++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index aaf45cb1..0bf4f01a 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -121,7 +121,9 @@ fn run() -> error::Result<()> {
         let filter = matches.value_of("filter");
         let corpus_path = current_dir.join("corpus");
         if let Some(language) = loader.language_at_path(&current_dir)? {
-            test::run_tests_at_path(language, &corpus_path, debug, debug_graph, filter)?;
+            if !test::run_tests_at_path(language, &corpus_path, debug, debug_graph, filter)? {
+                exit(1);
+            }
         } else {
             eprintln!("No language found");
         }
diff --git a/cli/src/test.rs b/cli/src/test.rs
index b8b78b8f..d6a2a7ce 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -48,7 +48,7 @@ pub fn run_tests_at_path(
     debug: bool,
     debug_graph: bool,
     filter: Option<&str>,
-) -> Result<()> {
+) -> Result<bool> {
     let test_entry = parse_tests(path)?;
     let mut _log_session = None;
     let mut parser = Parser::new();
@@ -86,9 +86,10 @@ pub fn run_tests_at_path(
             println!("\n  {}. {}:", i + 1, name);
             print_diff(actual, expected);
         }
+        Ok(true)
+    } else {
+        Ok(false)
     }
-
-    Ok(())
 }
 
 pub fn print_diff_key() {

From 8e198016d86e55730676a156155cd42025296540 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 27 Jan 2019 21:17:31 -0800
Subject: [PATCH 188/208] Try to make build-fuzzers script work w/ new
 structure & build process

---
 script/build-fuzzers | 11 ++++-------
 script/build-lib     |  1 +
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/script/build-fuzzers b/script/build-fuzzers
index 2c48f6c9..0a19bc4c 100755
--- a/script/build-fuzzers
+++ b/script/build-fuzzers
@@ -20,11 +20,8 @@ default_fuzz_flags="-fsanitize=address,undefined -fsanitize-coverage=trace-pc-gu
 CFLAGS=${CFLAGS:-"$default_fuzz_flags"}
 CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"}
 
-CC=$CC CXX=$CXX LINK=$LINK CFLAGS=$CFLAGS CXXFLAGS=$CXXFLAGS ./script/configure
-
-export BUILDTYPE=Fuzz
-make runtime
-
+export CFLAGS
+script/build-lib
 
 if [ -z "$@" ]; then
   languages=$(ls test/fixtures/grammars)
@@ -56,9 +53,9 @@ for lang in ${languages[@]}; do
 
   modes=(true halt false recover)
   for i in 0 2; do
-    $CXX $CXXFLAGS -std=c++11 -Iinclude -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="tree_sitter_$lang" \
+    $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="tree_sitter_$lang" \
       "test/fuzz/fuzzer.cc" "${objects[@]}" \
-      out/Fuzz/obj.target/libruntime.a "$LIB_FUZZER_PATH" \
+      libtree-sitter.a "$LIB_FUZZER_PATH" \
       -o "out/${lang}_fuzzer_${modes[i+1]}"
   done
 
diff --git a/script/build-lib b/script/build-lib
index b81a4b0a..be287db0 100755
--- a/script/build-lib
+++ b/script/build-lib
@@ -11,6 +11,7 @@ ${CC}                   \
   -c                    \
   -O3                   \
   -std=c99              \
+  $CFLAGS               \
   -I lib/src            \
   -I lib/include        \
   -I lib/utf8proc       \

From f6d014f3f4c302a16a14a6afc036d31c8cd6605b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 28 Jan 2019 14:23:41 -0800
Subject: [PATCH 189/208] Write tree_sitter/parser.h file in generate command

---
 cli/src/generate/mod.rs | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 535f9d19..9e954298 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -40,7 +40,9 @@ pub fn generate_parser_in_directory(
         let (language_name, c_code) =
             generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
         let repo_src_path = repo_path.join("src");
+        let repo_header_path = repo_src_path.join("tree_sitter");
         fs::create_dir_all(&repo_src_path)?;
+        fs::create_dir_all(&repo_header_path)?;
         fs::write(&repo_src_path.join("parser.c"), c_code)
             .map_err(|e| format!("Failed to write parser.c: {}", e))?;
         ensure_file(&repo_src_path.join("binding.cc"), || {
@@ -52,6 +54,9 @@ pub fn generate_parser_in_directory(
         ensure_file(&repo_path.join("index.js"), || {
             npm_files::index_js(&language_name)
         })?;
+        ensure_file(&repo_header_path.join("parser.h"), || {
+            include_str!("../../../lib/include/tree_sitter/parser.h")
+        })?;
     }
     properties::generate_property_sheets(repo_path)?;
     Ok(())
@@ -96,7 +101,10 @@ fn load_grammar_file(grammar_path: &PathBuf) -> Result<String> {
     match grammar_path.extension().and_then(|e| e.to_str()) {
         Some("js") => Ok(load_js_grammar_file(grammar_path)?),
         Some("json") => Ok(fs::read_to_string(grammar_path)?),
-        _ => Err(Error(format!("Unknown grammar file extension: {:?}", grammar_path))),
+        _ => Err(Error(format!(
+            "Unknown grammar file extension: {:?}",
+            grammar_path
+        ))),
     }
 }
 
@@ -129,10 +137,10 @@ fn load_js_grammar_file(grammar_path: &PathBuf) -> Result<String> {
     Ok(String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"))
 }
 
-fn ensure_file(path: &PathBuf, f: impl Fn() -> String) -> Result<()> {
+fn ensure_file<T: AsRef<[u8]>>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> {
     if path.exists() {
         Ok(())
     } else {
-        fs::write(path, f()).map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e)))
+        fs::write(path, f().as_ref()).map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e)))
     }
 }

From 213ccfd3a47b4e0ce0f52d2db3b1875b4ef37998 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 29 Jan 2019 15:30:13 -0800
Subject: [PATCH 190/208] Update trees' metadata bits when setting symbol back
 to word token

---
 lib/src/parser.c  |  2 +-
 lib/src/subtree.c | 18 ++++++++++++++++++
 lib/src/subtree.h | 10 +---------
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/lib/src/parser.c b/lib/src/parser.c
index 56326feb..85452f8d 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -1363,7 +1363,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
         );
 
         MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
-        ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token);
+        ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
         lookahead = ts_subtree_from_mut(mutable_lookahead);
         continue;
       }
diff --git a/lib/src/subtree.c b/lib/src/subtree.c
index 00af7507..6ca00792 100644
--- a/lib/src/subtree.c
+++ b/lib/src/subtree.c
@@ -225,6 +225,24 @@ Subtree ts_subtree_new_leaf(
   }
 }
 
+void ts_subtree_set_symbol(
+  MutableSubtree *self,
+  TSSymbol symbol,
+  const TSLanguage *language
+) {
+  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
+  if (self->data.is_inline) {
+    assert(symbol < UINT8_MAX);
+    self->data.symbol = symbol;
+    self->data.named = metadata.named;
+    self->data.visible = metadata.visible;
+  } else {
+    self->ptr->symbol = symbol;
+    self->ptr->named = metadata.named;
+    self->ptr->visible = metadata.visible;
+  }
+}
+
 Subtree ts_subtree_new_error(
   SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
   uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
diff --git a/lib/src/subtree.h b/lib/src/subtree.h
index 039494b5..b0423afb 100644
--- a/lib/src/subtree.h
+++ b/lib/src/subtree.h
@@ -132,6 +132,7 @@ void ts_subtree_retain(Subtree);
 void ts_subtree_release(SubtreePool *, Subtree);
 bool ts_subtree_eq(Subtree, Subtree);
 int ts_subtree_compare(Subtree, Subtree);
+void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
 void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *);
 void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
 Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
@@ -154,15 +155,6 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE
 
 #undef SUBTREE_GET
 
-static inline void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol) {
-  if (self->data.is_inline) {
-    assert(symbol < UINT8_MAX);
-    self->data.symbol = symbol;
-  } else {
-    self->ptr->symbol = symbol;
-  }
-}
-
 static inline void ts_subtree_set_extra(MutableSubtree *self) {
   if (self->data.is_inline) {
     self->data.extra = true;

From d192eda9cf103f88468acdf40dc77114d30bde67 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 30 Jan 2019 21:43:44 -0800
Subject: [PATCH 191/208] Remove stray word 'runtime' from comment

---
 lib/src/lib.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/src/lib.c b/lib/src/lib.c
index b29f5214..49ddf672 100644
--- a/lib/src/lib.c
+++ b/lib/src/lib.c
@@ -1,5 +1,4 @@
-// The Tree-sitter runtime library can be built by compiling this
-// one source file.
+// The Tree-sitter library can be built by compiling this one source file.
 //
 // The following directories must be added to the include path:
 //   - src

From e26cbb62a580dfd5a5be178b4b5e7ba4fb98dc5b Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 31 Jan 2019 08:15:30 -0800
Subject: [PATCH 192/208] Add Tree::edit unit tests

---
 cli/src/tests/mod.rs       |   1 +
 cli/src/tests/tree_test.rs | 191 +++++++++++++++++++++++++++++++++++++
 2 files changed, 192 insertions(+)
 create mode 100644 cli/src/tests/tree_test.rs

diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index beafa172..3641cc3e 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -1,3 +1,4 @@
 mod corpus_test;
 mod helpers;
 mod parser_api_test;
+mod tree_test;
diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs
new file mode 100644
index 00000000..401ff03a
--- /dev/null
+++ b/cli/src/tests/tree_test.rs
@@ -0,0 +1,191 @@
+use super::helpers::fixtures::get_language;
+use tree_sitter::{InputEdit, Language, Parser, Point};
+
+#[test]
+fn test_edit() {
+    let mut parser = Parser::new();
+    parser.set_language(javascript()).unwrap();
+    let tree = parser.parse_str("  abc  !==  def", None).unwrap();
+
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(program (expression_statement (binary_expression (identifier) (identifier))))"
+    );
+
+    // edit entirely within the tree's padding:
+    // resize the padding of the tree and its leftmost descendants.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 1,
+            old_end_byte: 1,
+            new_end_byte: 2,
+            start_position: Point::new(0, 1),
+            old_end_position: Point::new(0, 1),
+            new_end_position: Point::new(0, 2),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.start_byte(), 3);
+        assert_eq!(expr.end_byte(), 16);
+        assert!(child1.has_changes());
+        assert_eq!(child1.start_byte(), 3);
+        assert_eq!(child1.end_byte(), 6);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.start_byte(), 8);
+        assert_eq!(child2.end_byte(), 11);
+    }
+
+    // edit starting in the tree's padding but extending into its content:
+    // shrink the content to compenstate for the expanded padding.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 1,
+            old_end_byte: 4,
+            new_end_byte: 5,
+            start_position: Point::new(0, 1),
+            old_end_position: Point::new(0, 5),
+            new_end_position: Point::new(0, 5),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.start_byte(), 5);
+        assert_eq!(expr.end_byte(), 16);
+        assert!(child1.has_changes());
+        assert_eq!(child1.start_byte(), 5);
+        assert_eq!(child1.end_byte(), 6);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.start_byte(), 8);
+        assert_eq!(child2.end_byte(), 11);
+    }
+
+    // insertion at the edge of a tree's padding:
+    // expand the tree's padding.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 2,
+            old_end_byte: 2,
+            new_end_byte: 4,
+            start_position: Point::new(0, 2),
+            old_end_position: Point::new(0, 2),
+            new_end_position: Point::new(0, 4),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.start_byte(), 4);
+        assert_eq!(expr.end_byte(), 17);
+        assert!(child1.has_changes());
+        assert_eq!(child1.start_byte(), 4);
+        assert_eq!(child1.end_byte(), 7);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.start_byte(), 9);
+        assert_eq!(child2.end_byte(), 12);
+    }
+
+    // replacement starting at the edge of the tree's padding:
+    // resize the content and not the padding.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 2,
+            old_end_byte: 2,
+            new_end_byte: 4,
+            start_position: Point::new(0, 2),
+            old_end_position: Point::new(0, 2),
+            new_end_position: Point::new(0, 4),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.start_byte(), 4);
+        assert_eq!(expr.end_byte(), 17);
+        assert!(child1.has_changes());
+        assert_eq!(child1.start_byte(), 4);
+        assert_eq!(child1.end_byte(), 7);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.start_byte(), 9);
+        assert_eq!(child2.end_byte(), 12);
+    }
+
+    // deletion that spans more than one child node:
+    // shrink subsequent child nodes.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 1,
+            old_end_byte: 11,
+            new_end_byte: 4,
+            start_position: Point::new(0, 1),
+            old_end_position: Point::new(0, 11),
+            new_end_position: Point::new(0, 4),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+        let child3 = expr.child(2).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.start_byte(), 4);
+        assert_eq!(expr.end_byte(), 8);
+        assert!(child1.has_changes());
+        assert_eq!(child1.start_byte(), 4);
+        assert_eq!(child1.end_byte(), 4);
+        assert!(child2.has_changes());
+        assert_eq!(child2.start_byte(), 4);
+        assert_eq!(child2.end_byte(), 4);
+        assert!(child3.has_changes());
+        assert_eq!(child3.start_byte(), 5);
+        assert_eq!(child3.end_byte(), 8);
+    }
+
+    // insertion at the end of the tree:
+    // extend the tree's content.
+    {
+        let mut tree = tree.clone();
+        tree.edit(&InputEdit {
+            start_byte: 15,
+            old_end_byte: 15,
+            new_end_byte: 16,
+            start_position: Point::new(0, 15),
+            old_end_position: Point::new(0, 15),
+            new_end_position: Point::new(0, 16),
+        });
+
+        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
+        let child1 = expr.child(0).unwrap();
+        let child2 = expr.child(1).unwrap();
+        let child3 = expr.child(2).unwrap();
+
+        assert!(expr.has_changes());
+        assert_eq!(expr.start_byte(), 2);
+        assert_eq!(expr.end_byte(), 16);
+        assert!(!child1.has_changes());
+        assert_eq!(child1.end_byte(), 5);
+        assert!(!child2.has_changes());
+        assert_eq!(child2.end_byte(), 10);
+        assert!(child3.has_changes());
+        assert_eq!(child3.end_byte(), 16);
+    }
+}
+
+fn javascript() -> Language {
+    get_language("javascript")
+}

From 4cac85fec4ce5d01e468c0f0fa34bdad4187d882 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 1 Feb 2019 14:39:37 -0800
Subject: [PATCH 193/208] Add benchmark script

* Structure `cli` crate as both a library and an executable, so that
benchmarks can import code from the crate.
* Import macros in the Rust 2018 style.
---
 .appveyor.yml                                 |   2 +-
 .travis.yml                                   |   1 +
 cli/Cargo.toml                                |   4 +
 cli/benches/benchmark.rs                      | 172 ++++++++++++++++++
 .../generate/build_tables/build_lex_table.rs  |   1 +
 cli/src/generate/build_tables/item.rs         |   1 +
 .../build_tables/minimize_parse_table.rs      |   1 +
 cli/src/generate/build_tables/mod.rs          |   1 +
 cli/src/generate/mod.rs                       |   5 +-
 cli/src/generate/parse_grammar.rs             |   1 +
 .../generate/prepare_grammar/expand_tokens.rs |   1 +
 cli/src/generate/properties.rs                |   2 +
 cli/src/lib.rs                                |  10 +
 cli/src/loader.rs                             |   1 +
 cli/src/logger.rs                             |   2 +-
 cli/src/main.rs                               |  25 +--
 cli/src/test.rs                               |   6 +-
 cli/src/tests/corpus_test.rs                  |   6 +-
 cli/src/tests/helpers/allocations.rs          |   6 +-
 cli/src/tests/helpers/dirs.rs                 |  11 ++
 cli/src/tests/helpers/fixtures.rs             |  12 +-
 cli/src/tests/parser_api_test.rs              |   1 +
 cli/src/util.rs                               |   8 +-
 script/benchmark                              |  53 +-----
 script/benchmark.cmd                          |   3 +
 25 files changed, 244 insertions(+), 92 deletions(-)
 create mode 100644 cli/benches/benchmark.rs
 create mode 100644 cli/src/lib.rs
 create mode 100644 cli/src/tests/helpers/dirs.rs
 create mode 100644 script/benchmark.cmd

diff --git a/.appveyor.yml b/.appveyor.yml
index de82a7d5..610ac134 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -25,8 +25,8 @@ test_script:
   - script\regenerate-fixtures.cmd
 
   # Run tests
-  - set TREE_SITTER_TEST=1
   - script\test.cmd
+  - script\benchmark.cmd
 
 before_deploy:
   - move target\release\tree-sitter.exe tree-sitter.exe
diff --git a/.travis.yml b/.travis.yml
index 722a4dc9..06c71b34 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,7 @@ script:
   # Run tests
   - export TREE_SITTER_STATIC_ANALYSIS=1
   - script/test
+  - script/benchmark
 
 branches:
   only:
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 75efdb18..35b6c7a0 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -8,6 +8,10 @@ edition = "2018"
 name = "tree-sitter"
 path = "src/main.rs"
 
+[[bench]]
+name = "benchmark"
+harness = false
+
 [dependencies]
 cc = "1.0"
 ansi_term = "0.11"
diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs
new file mode 100644
index 00000000..472ab886
--- /dev/null
+++ b/cli/benches/benchmark.rs
@@ -0,0 +1,172 @@
+use lazy_static::lazy_static;
+use std::collections::BTreeMap;
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+use std::{env, fs, usize};
+use tree_sitter::{Language, Parser};
+use tree_sitter_cli::loader::Loader;
+
+include!("../src/tests/helpers/dirs.rs");
+
+lazy_static! {
+    static ref LANGUAGE_FILTER: Option<String> =
+        env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok();
+    static ref EXAMPLE_FILTER: Option<String> =
+        env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok();
+    static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
+    static ref EXAMPLE_PATHS_BY_LANGUAGE_NAME: BTreeMap<String, Vec<PathBuf>> = {
+        let mut result = BTreeMap::new();
+        let grammar_dirs = fs::read_dir(&(*GRAMMARS_DIR)).unwrap();
+        for grammar_dir in grammar_dirs {
+            let grammar_dir = grammar_dir.unwrap();
+            if !grammar_dir.path().is_dir() {
+                continue;
+            }
+
+            let language_name = grammar_dir.file_name();
+            let language_name = language_name.to_str().unwrap();
+            if let Ok(example_files) = fs::read_dir(&grammar_dir.path().join("examples")) {
+                result.insert(
+                    language_name.to_string(),
+                    example_files
+                        .filter_map(|p| {
+                            let p = p.unwrap().path();
+                            if p.is_file() {
+                                Some(p)
+                            } else {
+                                None
+                            }
+                        })
+                        .collect(),
+                );
+            } else {
+                result.insert(language_name.to_string(), Vec::new());
+            }
+        }
+
+        result
+    };
+}
+
+fn main() {
+    let mut parser = Parser::new();
+    let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_NAME
+        .iter()
+        .flat_map(|(_, paths)| paths.iter())
+        .map(|p| p.file_name().unwrap().to_str().unwrap().chars().count())
+        .max()
+        .unwrap();
+
+    let mut all_normal_speeds = Vec::new();
+    let mut all_error_speeds = Vec::new();
+
+    for (language_name, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_NAME.iter() {
+        // TODO - remove after fixing slow error parsing HTML.
+        if language_name == "html" {
+            continue;
+        }
+
+        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
+            if language_name != filter.as_str() {
+                continue;
+            }
+        }
+
+        eprintln!("\nLanguage: {}", language_name);
+        parser.set_language(get_language(language_name)).unwrap();
+
+        eprintln!("  Normal examples:");
+        let mut normal_speeds = Vec::new();
+        for example_path in example_paths {
+            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                if !example_path.to_str().unwrap().contains(filter.as_str()) {
+                    continue;
+                }
+            }
+
+            normal_speeds.push(parse(&mut parser, example_path, max_path_length));
+        }
+
+        eprintln!("  Error examples (mismatched languages):");
+        let mut error_speeds = Vec::new();
+        for (other_language_name, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_NAME.iter() {
+            if other_language_name != language_name {
+                for example_path in example_paths {
+                    if let Some(filter) = EXAMPLE_FILTER.as_ref() {
+                        if !example_path.to_str().unwrap().contains(filter.as_str()) {
+                            continue;
+                        }
+                    }
+
+                    error_speeds.push(parse(&mut parser, example_path, max_path_length));
+                }
+            }
+        }
+
+        if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) {
+            eprintln!("  Average Speed (normal): {} bytes/ms", average_normal);
+            eprintln!("  Worst Speed (normal):   {} bytes/ms", worst_normal);
+        }
+
+        if let Some((average_error, worst_error)) = aggregate(&error_speeds) {
+            eprintln!("  Average Speed (errors): {} bytes/ms", average_error);
+            eprintln!("  Worst Speed (errors):   {} bytes/ms", worst_error);
+        }
+
+        all_normal_speeds.extend(normal_speeds);
+        all_error_speeds.extend(error_speeds);
+    }
+
+    eprintln!("\nOverall");
+    if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
+        eprintln!("  Average Speed (normal): {} bytes/ms", average_normal);
+        eprintln!("  Worst Speed (normal):   {} bytes/ms", worst_normal);
+    }
+
+    if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) {
+        eprintln!("  Average Speed (errors): {} bytes/ms", average_error);
+        eprintln!("  Worst Speed (errors):   {} bytes/ms", worst_error);
+    }
+    eprintln!("");
+}
+
+fn aggregate(speeds: &Vec<(usize)>) -> Option<(usize, usize)> {
+    if speeds.is_empty() {
+        return None;
+    }
+    let mut total = 0;
+    let mut max = usize::MAX;
+    for speed in speeds.iter().cloned() {
+        total += speed;
+        if speed < max {
+            max = speed;
+        }
+    }
+    Some((total / speeds.len(), max))
+}
+
+fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize {
+    eprint!(
+        "    {:width$}\t",
+        example_path.file_name().unwrap().to_str().unwrap(),
+        width = max_path_length
+    );
+
+    let source_code = fs::read(example_path).unwrap();
+    let time = Instant::now();
+    let _tree = parser
+        .parse_utf8(&mut |byte, _| &source_code[byte..], None)
+        .expect("Incompatible language version");
+    let duration = time.elapsed();
+    let duration_ms =
+        duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0;
+    let speed = (source_code.len() as f64 / duration_ms) as usize;
+    eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed);
+    speed
+}
+
+fn get_language(name: &str) -> Language {
+    TEST_LOADER
+        .load_language_at_path(name, &GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR)
+        .unwrap()
+}
diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs
index 38f56cc3..ef4b3e5e 100644
--- a/cli/src/generate/build_tables/build_lex_table.rs
+++ b/cli/src/generate/build_tables/build_lex_table.rs
@@ -5,6 +5,7 @@ use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
 use crate::generate::nfa::{CharacterSet, NfaCursor};
 use crate::generate::rules::Symbol;
 use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
+use log::info;
 use std::collections::hash_map::Entry;
 use std::collections::{BTreeMap, HashMap, VecDeque};
 
diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs
index 9f3307dd..b450bb75 100644
--- a/cli/src/generate/build_tables/item.rs
+++ b/cli/src/generate/build_tables/item.rs
@@ -1,6 +1,7 @@
 use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
 use crate::generate::rules::Associativity;
 use crate::generate::rules::{Symbol, SymbolType};
+use lazy_static::lazy_static;
 use smallbitvec::SmallBitVec;
 use std::cmp::Ordering;
 use std::fmt;
diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs
index bb9b26eb..9b012afe 100644
--- a/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@@ -4,6 +4,7 @@ use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
 use crate::generate::rules::{AliasMap, Symbol};
 use crate::generate::tables::{ParseAction, ParseState, ParseTable, ParseTableEntry};
 use hashbrown::{HashMap, HashSet};
+use log::info;
 
 pub(crate) fn minimize_parse_table(
     parse_table: &mut ParseTable,
diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs
index 36f6770b..df19f9e0 100644
--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@@ -17,6 +17,7 @@ use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGram
 use crate::generate::nfa::{CharacterSet, NfaCursor};
 use crate::generate::rules::{AliasMap, Symbol, SymbolType};
 use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
+use log::info;
 
 pub(crate) fn build_tables(
     syntax_grammar: &SyntaxGrammar,
diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 9e954298..397fd677 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -3,6 +3,7 @@ use self::parse_grammar::parse_grammar;
 use self::prepare_grammar::prepare_grammar;
 use self::render::render_c_code;
 use crate::error::{Error, Result};
+use lazy_static::lazy_static;
 use regex::{Regex, RegexBuilder};
 use std::fs;
 use std::io::Write;
@@ -62,7 +63,6 @@ pub fn generate_parser_in_directory(
     Ok(())
 }
 
-#[cfg(test)]
 pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<(String, String)> {
     let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
     generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
@@ -141,6 +141,7 @@ fn ensure_file<T: AsRef<[u8]>>(path: &PathBuf, f: impl Fn() -> T) -> Result<()>
     if path.exists() {
         Ok(())
     } else {
-        fs::write(path, f().as_ref()).map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e)))
+        fs::write(path, f().as_ref())
+            .map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e)))
     }
 }
diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs
index e77dce9b..cf2005ad 100644
--- a/cli/src/generate/parse_grammar.rs
+++ b/cli/src/generate/parse_grammar.rs
@@ -1,6 +1,7 @@
 use super::grammars::{InputGrammar, Variable, VariableType};
 use super::rules::Rule;
 use crate::error::Result;
+use serde_derive::Deserialize;
 use serde_json::{Map, Value};
 
 #[derive(Deserialize)]
diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
index 8e0f12fe..9e2cf9fe 100644
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -3,6 +3,7 @@ use crate::error::{Error, Result};
 use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
 use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
 use crate::generate::rules::Rule;
+use lazy_static::lazy_static;
 use regex::Regex;
 use regex_syntax::ast::{
     parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
diff --git a/cli/src/generate/properties.rs b/cli/src/generate/properties.rs
index 4df4d67d..f5861159 100644
--- a/cli/src/generate/properties.rs
+++ b/cli/src/generate/properties.rs
@@ -1,6 +1,8 @@
 use crate::error::{Error, Result};
+use log::info;
 use rsass;
 use rsass::sass::Value;
+use serde_derive::Serialize;
 use std::cmp::Ordering;
 use std::collections::hash_map::Entry;
 use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
diff --git a/cli/src/lib.rs b/cli/src/lib.rs
new file mode 100644
index 00000000..9038b5b8
--- /dev/null
+++ b/cli/src/lib.rs
@@ -0,0 +1,10 @@
+pub mod error;
+pub mod generate;
+pub mod loader;
+pub mod logger;
+pub mod parse;
+pub mod test;
+pub mod util;
+
+#[cfg(test)]
+mod tests;
diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index 6dd4e4db..6c6d2c5c 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -1,5 +1,6 @@
 use libloading::{Library, Symbol};
 use regex::{Regex, RegexBuilder};
+use serde_derive::Deserialize;
 use std::collections::HashMap;
 use std::fs;
 use std::io;
diff --git a/cli/src/logger.rs b/cli/src/logger.rs
index 18df763d..6abe6470 100644
--- a/cli/src/logger.rs
+++ b/cli/src/logger.rs
@@ -23,7 +23,7 @@ impl Log for Logger {
     fn flush(&self) {}
 }
 
-pub(crate) fn init() {
+pub fn init() {
     log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
     log::set_max_level(LevelFilter::Info);
 }
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 0bf4f01a..3c0b057e 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -1,31 +1,10 @@
-#[macro_use]
-extern crate lazy_static;
-#[macro_use]
-extern crate log;
-#[macro_use]
-extern crate serde_derive;
-extern crate hashbrown;
-extern crate regex;
-extern crate rsass;
-extern crate serde_json;
-
-mod error;
-mod generate;
-mod loader;
-mod logger;
-mod parse;
-mod test;
-mod util;
-
-#[cfg(test)]
-mod tests;
-
-use self::loader::Loader;
 use clap::{App, AppSettings, Arg, SubCommand};
 use std::env;
 use std::fs;
 use std::path::Path;
 use std::process::exit;
+use tree_sitter_cli::loader::Loader;
+use tree_sitter_cli::{error, generate, logger, parse, test};
 use std::usize;
 
 fn main() {
diff --git a/cli/src/test.rs b/cli/src/test.rs
index d6a2a7ce..c8330af9 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -2,6 +2,7 @@ use super::error::Result;
 use super::util;
 use ansi_term::Colour;
 use difference::{Changeset, Difference};
+use lazy_static::lazy_static;
 use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
 use regex::Regex;
 use std::char;
@@ -38,7 +39,10 @@ pub enum TestEntry {
 
 impl Default for TestEntry {
     fn default() -> Self {
-        TestEntry::Group { name: String::new(), children: Vec::new() }
+        TestEntry::Group {
+            name: String::new(),
+            children: Vec::new(),
+        }
     }
 }
 
diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs
index 449669e3..1ee3ddc1 100644
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@@ -5,6 +5,7 @@ use super::helpers::scope_sequence::ScopeSequence;
 use crate::generate;
 use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
+use lazy_static::lazy_static;
 use std::{env, fs, time, usize};
 use tree_sitter::{InputEdit, LogType, Node, Parser, Point, Tree};
 
@@ -373,7 +374,10 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
 
         assert!(start_byte <= end_byte);
         assert!(start_point <= end_point);
-        assert_eq!(start_byte, line_offsets[start_point.row] + start_point.column);
+        assert_eq!(
+            start_byte,
+            line_offsets[start_point.row] + start_point.column
+        );
         assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
 
         let mut last_child_end_byte = start_byte;
diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs
index e3cdae27..ae246c40 100644
--- a/cli/src/tests/helpers/allocations.rs
+++ b/cli/src/tests/helpers/allocations.rs
@@ -1,6 +1,7 @@
 #![cfg(test)]
 #![allow(dead_code)]
 
+use lazy_static::lazy_static;
 use spin::Mutex;
 use std::collections::HashMap;
 use std::os::raw::{c_ulong, c_void};
@@ -46,10 +47,7 @@ pub fn stop_recording() {
             .map(|e| e.1)
             .collect::<Vec<_>>();
         allocation_indices.sort_unstable();
-        panic!(
-            "Leaked allocation indices: {:?}",
-            allocation_indices
-        );
+        panic!("Leaked allocation indices: {:?}", allocation_indices);
     }
 }
 
diff --git a/cli/src/tests/helpers/dirs.rs b/cli/src/tests/helpers/dirs.rs
new file mode 100644
index 00000000..4bf345d8
--- /dev/null
+++ b/cli/src/tests/helpers/dirs.rs
@@ -0,0 +1,11 @@
+lazy_static! {
+    static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
+    static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
+    static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
+    static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
+    static ref SCRATCH_DIR: PathBuf = {
+        let result = ROOT_DIR.join("target").join("scratch");
+        fs::create_dir_all(&result).unwrap();
+        result
+    };
+}
diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs
index 639b1004..981f0ab6 100644
--- a/cli/src/tests/helpers/fixtures.rs
+++ b/cli/src/tests/helpers/fixtures.rs
@@ -1,18 +1,12 @@
 use crate::loader::Loader;
+use lazy_static::lazy_static;
 use std::fs;
 use std::path::{Path, PathBuf};
 use tree_sitter::Language;
 
+include!("./dirs.rs");
+
 lazy_static! {
-    static ref ROOT_DIR: PathBuf = [env!("CARGO_MANIFEST_DIR"), ".."].iter().collect();
-    static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
-    static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
-    static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
-    static ref SCRATCH_DIR: PathBuf = {
-        let result = ROOT_DIR.join("target").join("scratch");
-        fs::create_dir_all(&result).unwrap();
-        result
-    };
     static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
 }
 
diff --git a/cli/src/tests/parser_api_test.rs b/cli/src/tests/parser_api_test.rs
index 9584ac4e..e46d9b55 100644
--- a/cli/src/tests/parser_api_test.rs
+++ b/cli/src/tests/parser_api_test.rs
@@ -1,4 +1,5 @@
 use super::helpers::fixtures::get_language;
+use serde_derive::Deserialize;
 use std::thread;
 use tree_sitter::{InputEdit, Language, LogType, Parser, Point, PropertySheet};
 
diff --git a/cli/src/util.rs b/cli/src/util.rs
index 004d3b06..e880bea1 100644
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@@ -8,18 +8,18 @@ use tree_sitter::Parser;
 const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
 
 #[cfg(windows)]
-pub(crate) struct LogSession();
+pub struct LogSession();
 
 #[cfg(unix)]
-pub(crate) struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
+pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
 
 #[cfg(windows)]
-pub(crate) fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
+pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
     Ok(LogSession())
 }
 
 #[cfg(unix)]
-pub(crate) fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
+pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
     use std::io::Write;
 
     let mut dot_file = std::fs::File::create(path)?;
diff --git a/script/benchmark b/script/benchmark
index e24c6b58..9b4ec3f0 100755
--- a/script/benchmark
+++ b/script/benchmark
@@ -6,7 +6,7 @@ function usage {
   cat <<-EOF
 USAGE
 
-  $0  [-Ld] [-l language-name] [-f example-file-name]
+  $0  [-h] [-l language-name] [-e example-file-name]
 
 OPTIONS
 
@@ -14,63 +14,24 @@ OPTIONS
 
   -l  run only the benchmarks for the given language
 
-  -f  run only the benchmarks that parse the file with the given name
-
-  -d  run tests in a debugger (either lldb or gdb)
-
-  -L  run benchmarks with parse logging turned on
-
-  -b  run make under the scan-build static analyzer
+  -e  run only the benchmarks that parse the example file with the given name
 
 EOF
 }
 
-if [ "$(uname -s)" == "Darwin" ]; then
-  export LINK="clang++ -fsanitize=address"
-fi
-
-mode=normal
-export BUILDTYPE=Release
-cmd=out/$BUILDTYPE/benchmarks
-run_scan_build=
-
-while getopts "bdhf:l:SL" option; do
+while getopts "hl:e:" option; do
   case ${option} in
     h)
       usage
       exit
       ;;
-    d)
-      mode=debug
-      ;;
-    f)
-      export TREE_SITTER_BENCHMARK_FILE_NAME=${OPTARG}
+    e)
+      export TREE_SITTER_BENCHMARK_EXAMPLE_FILTER=${OPTARG}
       ;;
     l)
-      export TREE_SITTER_BENCHMARK_LANGUAGE=${OPTARG}
-      ;;
-    L)
-      export TREE_SITTER_BENCHMARK_LOG=1
-      ;;
-    b)
-      run_scan_build=true
+      export TREE_SITTER_BENCHMARK_LANGUAGE_FILTER=${OPTARG}
       ;;
   esac
 done
 
-if [[ -n "$run_scan_build" ]]; then
-  . script/util/scan-build.sh
-  scan_build make -j2 benchmarks
-else
-  make -j2 benchmarks
-fi
-
-case $mode in
-  debug)
-    lldb $cmd
-    ;;
-
-  normal)
-    exec $cmd
-    ;;
-esac
+cargo bench
diff --git a/script/benchmark.cmd b/script/benchmark.cmd
new file mode 100644
index 00000000..f5608d9d
--- /dev/null
+++ b/script/benchmark.cmd
@@ -0,0 +1,3 @@
+@echo off
+
+cargo bench

From 91da7206b7d10f423042846e7bbabf439191eba9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 1 Feb 2019 15:54:34 -0800
Subject: [PATCH 194/208] Clean up environment variables after windows batch
 scripts

---
 script/fetch-fixtures.cmd      | 14 +++++++-------
 script/regenerate-fixtures.cmd | 10 +++++-----
 script/test.cmd                |  3 ++-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd
index 98d5d578..011d73ff 100644
--- a/script/fetch-fixtures.cmd
+++ b/script/fetch-fixtures.cmd
@@ -12,18 +12,18 @@ call:fetch_grammar python            master
 call:fetch_grammar ruby              master
 call:fetch_grammar rust              master
 call:fetch_grammar typescript        master
-EXIT /B 0
+exit /B 0
 
 :fetch_grammar
-SETLOCAL
-SET grammar_dir=test\fixtures\grammars\%~1
-SET grammar_url=https://github.com/tree-sitter/tree-sitter-%~1
-SET grammar_branch=%~2
-@IF NOT EXIST %grammar_dir% (
+setlocal
+set grammar_dir=test\fixtures\grammars\%~1
+set grammar_url=https://github.com/tree-sitter/tree-sitter-%~1
+set grammar_branch=%~2
+@if not exist %grammar_dir% (
   git clone %grammar_url% %grammar_dir% --depth=1
 )
 pushd %grammar_dir%
 git fetch origin %2 --depth=1
 git reset --hard FETCH_HEAD
 popd
-EXIT /B 0
+exit /B 0
diff --git a/script/regenerate-fixtures.cmd b/script/regenerate-fixtures.cmd
index 739bdba1..b307409e 100644
--- a/script/regenerate-fixtures.cmd
+++ b/script/regenerate-fixtures.cmd
@@ -10,13 +10,13 @@ call:regenerate javascript
 call:regenerate json
 call:regenerate python
 call:regenerate rust
-EXIT /B 0
+exit /B 0
 
 :regenerate
-SETLOCAL
-SET tree_sitter=%cd%\target\release\tree-sitter
-SET grammar_dir=test\fixtures\grammars\%~1
+setlocal
+set tree_sitter=%cd%\target\release\tree-sitter
+set grammar_dir=test\fixtures\grammars\%~1
 pushd %grammar_dir%
 %tree_sitter% generate src\grammar.json
 popd
-EXIT /B 0
+exit /B 0
diff --git a/script/test.cmd b/script/test.cmd
index ef4ce02e..d1b462e8 100644
--- a/script/test.cmd
+++ b/script/test.cmd
@@ -1,7 +1,8 @@
 @echo off
 
+setlocal
 set TREE_SITTER_TEST=1
 set RUST_TEST_THREADS=1
 set RUST_BACKTRACE=full
-
 cargo test "%~1" -- --nocapture
+endlocal

From e143710f4aad9ec1b5b493876b5456db942b0b88 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 1 Feb 2019 19:57:00 -0800
Subject: [PATCH 195/208] Move `properties` module out of `generate`

---
 cli/src/generate/mod.rs              | 49 +++++++++++++---------------
 cli/src/lib.rs                       |  1 +
 cli/src/main.rs                      | 20 +++++++-----
 cli/src/{generate => }/properties.rs |  0
 4 files changed, 34 insertions(+), 36 deletions(-)
 rename cli/src/{generate => }/properties.rs (100%)

diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 397fd677..127e956e 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -16,7 +16,6 @@ mod nfa;
 mod npm_files;
 mod parse_grammar;
 mod prepare_grammar;
-mod properties;
 mod render;
 mod rules;
 mod tables;
@@ -33,33 +32,29 @@ pub fn generate_parser_in_directory(
     grammar_path: Option<&str>,
     minimize: bool,
     state_ids_to_log: Vec<usize>,
-    properties_only: bool,
 ) -> Result<()> {
-    if !properties_only {
-        let grammar_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
-        let grammar_json = load_grammar_file(&grammar_path)?;
-        let (language_name, c_code) =
-            generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
-        let repo_src_path = repo_path.join("src");
-        let repo_header_path = repo_src_path.join("tree_sitter");
-        fs::create_dir_all(&repo_src_path)?;
-        fs::create_dir_all(&repo_header_path)?;
-        fs::write(&repo_src_path.join("parser.c"), c_code)
-            .map_err(|e| format!("Failed to write parser.c: {}", e))?;
-        ensure_file(&repo_src_path.join("binding.cc"), || {
-            npm_files::binding_cc(&language_name)
-        })?;
-        ensure_file(&repo_path.join("binding.gyp"), || {
-            npm_files::binding_gyp(&language_name)
-        })?;
-        ensure_file(&repo_path.join("index.js"), || {
-            npm_files::index_js(&language_name)
-        })?;
-        ensure_file(&repo_header_path.join("parser.h"), || {
-            include_str!("../../../lib/include/tree_sitter/parser.h")
-        })?;
-    }
-    properties::generate_property_sheets(repo_path)?;
+    let grammar_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
+    let grammar_json = load_grammar_file(&grammar_path)?;
+    let (language_name, c_code) =
+        generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
+    let repo_src_path = repo_path.join("src");
+    let repo_header_path = repo_src_path.join("tree_sitter");
+    fs::create_dir_all(&repo_src_path)?;
+    fs::create_dir_all(&repo_header_path)?;
+    fs::write(&repo_src_path.join("parser.c"), c_code)
+        .map_err(|e| format!("Failed to write parser.c: {}", e))?;
+    ensure_file(&repo_src_path.join("binding.cc"), || {
+        npm_files::binding_cc(&language_name)
+    })?;
+    ensure_file(&repo_path.join("binding.gyp"), || {
+        npm_files::binding_gyp(&language_name)
+    })?;
+    ensure_file(&repo_path.join("index.js"), || {
+        npm_files::index_js(&language_name)
+    })?;
+    ensure_file(&repo_header_path.join("parser.h"), || {
+        include_str!("../../../lib/include/tree_sitter/parser.h")
+    })?;
     Ok(())
 }
 
diff --git a/cli/src/lib.rs b/cli/src/lib.rs
index 9038b5b8..3a15b457 100644
--- a/cli/src/lib.rs
+++ b/cli/src/lib.rs
@@ -3,6 +3,7 @@ pub mod generate;
 pub mod loader;
 pub mod logger;
 pub mod parse;
+pub mod properties;
 pub mod test;
 pub mod util;
 
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 3c0b057e..4d4dc1c6 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -3,9 +3,9 @@ use std::env;
 use std::fs;
 use std::path::Path;
 use std::process::exit;
-use tree_sitter_cli::loader::Loader;
-use tree_sitter_cli::{error, generate, logger, parse, test};
 use std::usize;
+use tree_sitter_cli::loader::Loader;
+use tree_sitter_cli::{error, generate, logger, parse, properties, test};
 
 fn main() {
     if let Err(e) = run() {
@@ -87,13 +87,15 @@ fn run() -> error::Result<()> {
                 ids.filter_map(|id| usize::from_str_radix(id, 10).ok())
                     .collect()
             });
-        generate::generate_parser_in_directory(
-            &current_dir,
-            grammar_path,
-            minimize,
-            state_ids_to_log,
-            properties_only,
-        )?;
+        if !properties_only {
+            generate::generate_parser_in_directory(
+                &current_dir,
+                grammar_path,
+                minimize,
+                state_ids_to_log,
+            )?;
+        }
+        properties::generate_property_sheets(&current_dir)?;
     } else if let Some(matches) = matches.subcommand_matches("test") {
         let debug = matches.is_present("debug");
         let debug_graph = matches.is_present("debug-graph");
diff --git a/cli/src/generate/properties.rs b/cli/src/properties.rs
similarity index 100%
rename from cli/src/generate/properties.rs
rename to cli/src/properties.rs

From 6ca1047bb0a067371cc2b7fc492d99c424adf44c Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 1 Feb 2019 20:19:38 -0800
Subject: [PATCH 196/208] Remove ci shell script

---
 script/ci | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100755 script/ci

diff --git a/script/ci b/script/ci
deleted file mode 100755
index 6ad8a2b7..00000000
--- a/script/ci
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-script/fetch-fixtures
-script/check-mallocs
-script/build-runtime
-script/test -b
-script/benchmark -b

From f263a4fbe335404c6f79048187b57f6184587602 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 1 Feb 2019 21:20:27 -0800
Subject: [PATCH 197/208] Separate walk_with_properties tests from parser tests

---
 cli/src/main.rs                               |   2 +-
 cli/src/properties.rs                         |   7 +-
 cli/src/tests/mod.rs                          |   3 +-
 .../{parser_api_test.rs => parser_test.rs}    | 199 +-----------------
 cli/src/tests/properties_test.rs              | 134 ++++++++++++
 5 files changed, 145 insertions(+), 200 deletions(-)
 rename cli/src/tests/{parser_api_test.rs => parser_test.rs} (58%)
 create mode 100644 cli/src/tests/properties_test.rs

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 4d4dc1c6..299ab896 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -95,7 +95,7 @@ fn run() -> error::Result<()> {
                 state_ids_to_log,
             )?;
         }
-        properties::generate_property_sheets(&current_dir)?;
+        properties::generate_property_sheets_in_directory(&current_dir)?;
     } else if let Some(matches) = matches.subcommand_matches("test") {
         let debug = matches.is_present("debug");
         let debug_graph = matches.is_present("debug-graph");
diff --git a/cli/src/properties.rs b/cli/src/properties.rs
index f5861159..fccfd7ed 100644
--- a/cli/src/properties.rs
+++ b/cli/src/properties.rs
@@ -421,7 +421,7 @@ impl fmt::Debug for Selector {
     }
 }
 
-pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
+pub fn generate_property_sheets_in_directory(repo_path: &Path) -> Result<()> {
     let src_dir_path = repo_path.join("src");
     let properties_dir_path = repo_path.join("properties");
 
@@ -443,6 +443,11 @@ pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
     Ok(())
 }
 
+pub fn generate_property_sheet_string(path: impl AsRef<Path>, css: &str) -> Result<String> {
+    let sheet = generate_property_sheet(path, css)?;
+    Ok(serde_json::to_string(&sheet)?)
+}
+
 fn generate_property_sheet(path: impl AsRef<Path>, css: &str) -> Result<PropertySheetJSON> {
     let rules = parse_property_sheet(path.as_ref(), &css)?;
     Ok(Builder::new(rules).build())
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index 3641cc3e..b8f6ad1f 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -1,4 +1,5 @@
 mod corpus_test;
 mod helpers;
-mod parser_api_test;
+mod parser_test;
+mod properties_test;
 mod tree_test;
diff --git a/cli/src/tests/parser_api_test.rs b/cli/src/tests/parser_test.rs
similarity index 58%
rename from cli/src/tests/parser_api_test.rs
rename to cli/src/tests/parser_test.rs
index e46d9b55..a061d8c6 100644
--- a/cli/src/tests/parser_api_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -1,7 +1,6 @@
 use super::helpers::fixtures::get_language;
-use serde_derive::Deserialize;
 use std::thread;
-use tree_sitter::{InputEdit, Language, LogType, Parser, Point, PropertySheet};
+use tree_sitter::{InputEdit, Language, LogType, Parser, Point};
 
 #[test]
 fn test_basic_parsing() {
@@ -93,200 +92,6 @@ fn test_tree_cursor() {
     assert_eq!(cursor.node().is_named(), true);
 }
 
-#[test]
-fn test_tree_property_matching() {
-    let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
-    let source_code = "fn f1() { f2(); }";
-    let tree = parser.parse_str(source_code, None).unwrap();
-
-    #[derive(Debug, Deserialize, PartialEq, Eq)]
-    struct Properties {
-        reference: Option<String>,
-        define: Option<String>,
-    }
-
-    let empty_properties = Properties {
-        reference: None,
-        define: None,
-    };
-
-    let property_sheet = PropertySheet::<Properties>::new(
-        rust(),
-        r##"
-        {
-            "states": [
-                {
-                    "transitions": [
-                        {"type": "call_expression", "named": true, "state_id": 1},
-                        {"type": "function_item", "named": true, "state_id": 2}
-                    ],
-                    "default_next_state_id": 0,
-                    "property_set_id": 0
-                },
-                {
-                    "transitions": [
-                        {"type": "identifier", "named": true, "state_id": 3}
-                    ],
-                    "default_next_state_id": 0,
-                    "property_set_id": 0
-                },
-                {
-                    "transitions": [
-                        {"type": "identifier", "named": true, "state_id": 4}
-                    ],
-                    "default_next_state_id": 0,
-                    "property_set_id": 0
-                },
-                {
-                    "transitions": [],
-                    "default_next_state_id": 0,
-                    "property_set_id": 1
-                },
-                {
-                    "transitions": [],
-                    "default_next_state_id": 0,
-                    "property_set_id": 2
-                }
-            ],
-            "property_sets": [
-                {},
-                {"reference": "function"},
-                {"define": "function"}
-            ]
-        }
-    "##,
-    )
-    .unwrap();
-
-    let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
-    assert_eq!(cursor.node().kind(), "source_file");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "function_item");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "fn");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-    assert!(!cursor.goto_first_child());
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(cursor.node_properties().define, Some("function".to_owned()));
-    assert!(!cursor.goto_first_child());
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "parameters");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "(");
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), ")");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    assert!(cursor.goto_parent());
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "block");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    assert!(cursor.goto_first_child());
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "call_expression");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        cursor.node_properties().reference,
-        Some("function".to_owned())
-    );
-}
-
-#[test]
-fn test_tree_property_matching_with_regexes() {
-    let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
-    let source_code = "fn f1() { None(a()) }";
-    let tree = parser.parse_str(source_code, None).unwrap();
-
-    #[derive(Debug, Deserialize, PartialEq, Eq)]
-    struct Properties {
-        scope: Option<String>,
-    }
-
-    let empty_properties = Properties { scope: None };
-
-    let property_sheet = PropertySheet::<Properties>::new(
-        rust(),
-        r##"
-        {
-            "states": [
-                {
-                    "id": 0,
-                    "transitions": [
-                        {"type": "call_expression", "named": true, "state_id": 1}
-                    ],
-                    "default_next_state_id": 0,
-                    "property_set_id": 0
-                },
-                {
-                    "id": 1,
-                    "transitions": [
-                        {"type": "identifier", "named": true, "text": "^[A-Z]", "state_id": 2},
-                        {"type": "identifier", "named": true, "state_id": 3}
-                    ],
-                    "default_next_state_id": 0,
-                    "property_set_id": 0
-                },
-                {
-                    "transitions": [],
-                    "default_next_state_id": 0,
-                    "property_set_id": 1
-                },
-                {
-                    "transitions": [],
-                    "default_next_state_id": 0,
-                    "property_set_id": 2
-                }
-            ],
-            "property_sets": [
-                {},
-                {"scope": "constructor"},
-                {"scope": "function"}
-            ]
-        }
-    "##,
-    )
-    .unwrap();
-
-    let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
-    assert_eq!(cursor.node().kind(), "source_file");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    cursor.goto_first_child();
-    assert!(cursor.goto_first_child());
-    assert!(cursor.goto_next_sibling());
-    assert!(cursor.goto_next_sibling());
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "block");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    assert!(cursor.goto_first_child());
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "call_expression");
-    assert_eq!(*cursor.node_properties(), empty_properties);
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "identifier");
-    assert_eq!(
-        cursor.node_properties().scope,
-        Some("constructor".to_owned())
-    );
-}
-
 #[test]
 fn test_custom_utf8_input() {
     let mut parser = Parser::new();
@@ -454,7 +259,7 @@ fn test_editing() {
 fn test_parallel_parsing() {
     // Parse this source file so that each thread has a non-trivial amount of
     // work to do.
-    let this_file_source = include_str!("parser_api_test.rs");
+    let this_file_source = include_str!("parser_test.rs");
 
     let mut parser = Parser::new();
     parser.set_language(rust()).unwrap();
diff --git a/cli/src/tests/properties_test.rs b/cli/src/tests/properties_test.rs
new file mode 100644
index 00000000..213eb9d0
--- /dev/null
+++ b/cli/src/tests/properties_test.rs
@@ -0,0 +1,134 @@
+use super::helpers::fixtures::get_language;
+use crate::properties;
+use serde_derive::Deserialize;
+use tree_sitter::{Parser, PropertySheet};
+
+#[derive(Debug, Default, Deserialize, PartialEq, Eq)]
+struct Properties {
+    a: Option<String>,
+    b: Option<String>,
+}
+
+#[test]
+fn test_walk_with_properties_with_nth_child() {
+    let language = get_language("javascript");
+    let property_sheet = PropertySheet::<Properties>::new(
+        language,
+        &properties::generate_property_sheet_string(
+            "/some/path.css",
+            "
+                binary_expression > identifier:nth-child(2) {
+                    a: x;
+                }
+
+                binary_expression > identifier {
+                    a: y;
+                }
+
+                identifier {
+                    a: z;
+                }
+            ",
+        )
+        .unwrap(),
+    )
+    .unwrap();
+
+    let source_code = "a = b || c;";
+
+    let mut parser = Parser::new();
+    parser.set_language(language).unwrap();
+    let tree = parser.parse_str(source_code, None).unwrap();
+
+    let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
+    assert_eq!(cursor.node().kind(), "program");
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "expression_statement");
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "assignment_expression");
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(*cursor.node_properties(), Properties { a: Some("z".to_string()), b: None });
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "=");
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "binary_expression");
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(*cursor.node_properties(), Properties { a: Some("y".to_string()), b: None });
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "||");
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(*cursor.node_properties(), Properties { a: Some("x".to_string()), b: None });
+}
+
+#[test]
+fn test_walk_with_properties_with_regexes() {
+    let language = get_language("javascript");
+    let property_sheet = PropertySheet::<Properties>::new(
+        language,
+        &properties::generate_property_sheet_string(
+            "/some/path.css",
+            "
+                identifier {
+                    &[text='^[A-Z]'] {
+                        a: y;
+                    }
+
+                    &[text='^[A-Z_]+$'] {
+                        a: z;
+                    }
+
+                    a: x;
+                }
+            ",
+        )
+        .unwrap(),
+    )
+    .unwrap();
+
+    let source_code = "const ABC = Def(ghi);";
+
+    let mut parser = Parser::new();
+    parser.set_language(language).unwrap();
+    let tree = parser.parse_str(source_code, None).unwrap();
+
+    let mut cursor = tree.walk_with_properties(&property_sheet, source_code);
+    assert_eq!(cursor.node().kind(), "program");
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "lexical_declaration");
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "const");
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "variable_declarator");
+
+    // The later selector with a text regex overrides the earlier one.
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(*cursor.node_properties(), Properties { a: Some("z".to_string()), b: None });
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "=");
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "call_expression");
+
+    // The selectors with text regexes override the selector without one.
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(*cursor.node_properties(), Properties { a: Some("y".to_string()), b: None });
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "arguments");
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "(");
+
+    // This node doesn't match either of the regexes.
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(*cursor.node_properties(), Properties { a: Some("x".to_string()), b: None });
+}

From d465850aba1a3ffca2499ea2bb4f628218886bb9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 2 Feb 2019 14:00:11 -0800
Subject: [PATCH 198/208] Add unit tests for ts_tree_get_changed_ranges

---
 cli/src/tests/corpus_test.rs   |  95 +---------------
 cli/src/tests/helpers/edits.rs |  94 ++++++++++++++++
 cli/src/tests/helpers/mod.rs   |   1 +
 cli/src/tests/parser_test.rs   |  58 +---------
 cli/src/tests/tree_test.rs     | 198 ++++++++++++++++++++++++++++++++-
 5 files changed, 295 insertions(+), 151 deletions(-)
 create mode 100644 cli/src/tests/helpers/edits.rs

diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs
index 1ee3ddc1..f1990963 100644
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@@ -1,4 +1,5 @@
 use super::helpers::allocations;
+use super::helpers::edits::{get_random_edit, invert_edit, perform_edit};
 use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language};
 use super::helpers::random::Rand;
 use super::helpers::scope_sequence::ScopeSequence;
@@ -7,7 +8,7 @@ use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
 use crate::util;
 use lazy_static::lazy_static;
 use std::{env, fs, time, usize};
-use tree_sitter::{InputEdit, LogType, Node, Parser, Point, Tree};
+use tree_sitter::{LogType, Node, Parser, Tree};
 
 const EDIT_COUNT: usize = 3;
 const TRIAL_COUNT: usize = 10;
@@ -187,12 +188,6 @@ fn test_real_language_corpus_files() {
     }
 }
 
-struct Edit {
-    position: usize,
-    deleted_length: usize,
-    inserted_text: Vec<u8>,
-}
-
 #[test]
 fn test_feature_corpus_files() {
     let test_grammars_dir = fixtures_dir().join("test_grammars");
@@ -279,92 +274,6 @@ fn test_feature_corpus_files() {
     }
 }
 
-fn get_random_edit(rand: &mut Rand, input: &Vec<u8>) -> Edit {
-    let choice = rand.unsigned(10);
-    if choice < 2 {
-        // Insert text at end
-        let inserted_text = rand.words(3);
-        Edit {
-            position: input.len(),
-            deleted_length: 0,
-            inserted_text,
-        }
-    } else if choice < 5 {
-        // Delete text from the end
-        let mut deleted_length = rand.unsigned(10);
-        if deleted_length > input.len() {
-            deleted_length = input.len();
-        }
-        Edit {
-            position: input.len() - deleted_length,
-            deleted_length,
-            inserted_text: vec![],
-        }
-    } else if choice < 8 {
-        // Insert at a random position
-        let position = rand.unsigned(input.len());
-        let word_count = 1 + rand.unsigned(3);
-        let inserted_text = rand.words(word_count);
-        Edit {
-            position,
-            deleted_length: 0,
-            inserted_text,
-        }
-    } else {
-        // Replace at random position
-        let position = rand.unsigned(input.len());
-        let deleted_length = rand.unsigned(input.len() - position);
-        let word_count = 1 + rand.unsigned(3);
-        let inserted_text = rand.words(word_count);
-        Edit {
-            position,
-            deleted_length,
-            inserted_text,
-        }
-    }
-}
-
-fn invert_edit(input: &Vec<u8>, edit: &Edit) -> Edit {
-    let position = edit.position;
-    let removed_content = &input[position..(position + edit.deleted_length)];
-    Edit {
-        position,
-        deleted_length: edit.inserted_text.len(),
-        inserted_text: removed_content.to_vec(),
-    }
-}
-
-fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) {
-    let start_byte = edit.position;
-    let old_end_byte = edit.position + edit.deleted_length;
-    let new_end_byte = edit.position + edit.inserted_text.len();
-    let start_position = position_for_offset(input, start_byte);
-    let old_end_position = position_for_offset(input, old_end_byte);
-    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
-    let new_end_position = position_for_offset(input, new_end_byte);
-    tree.edit(&InputEdit {
-        start_byte,
-        old_end_byte,
-        new_end_byte,
-        start_position,
-        old_end_position,
-        new_end_position,
-    });
-}
-
-fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
-    let mut result = Point { row: 0, column: 0 };
-    for c in &input[0..offset] {
-        if *c as char == '\n' {
-            result.row += 1;
-            result.column = 0;
-        } else {
-            result.column += 1;
-        }
-    }
-    result
-}
-
 fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
     fn check(node: Node, line_offsets: &Vec<usize>) {
         let start_byte = node.start_byte();
diff --git a/cli/src/tests/helpers/edits.rs b/cli/src/tests/helpers/edits.rs
new file mode 100644
index 00000000..4e4d0c25
--- /dev/null
+++ b/cli/src/tests/helpers/edits.rs
@@ -0,0 +1,94 @@
+use super::random::Rand;
+use tree_sitter::{InputEdit, Point, Tree};
+
+pub struct Edit {
+    pub position: usize,
+    pub deleted_length: usize,
+    pub inserted_text: Vec<u8>,
+}
+
+pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) {
+    let start_byte = edit.position;
+    let old_end_byte = edit.position + edit.deleted_length;
+    let new_end_byte = edit.position + edit.inserted_text.len();
+    let start_position = position_for_offset(input, start_byte);
+    let old_end_position = position_for_offset(input, old_end_byte);
+    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
+    let new_end_position = position_for_offset(input, new_end_byte);
+    tree.edit(&InputEdit {
+        start_byte,
+        old_end_byte,
+        new_end_byte,
+        start_position,
+        old_end_position,
+        new_end_position,
+    });
+}
+
+pub fn invert_edit(input: &Vec<u8>, edit: &Edit) -> Edit {
+    let position = edit.position;
+    let removed_content = &input[position..(position + edit.deleted_length)];
+    Edit {
+        position,
+        deleted_length: edit.inserted_text.len(),
+        inserted_text: removed_content.to_vec(),
+    }
+}
+
+pub fn get_random_edit(rand: &mut Rand, input: &Vec<u8>) -> Edit {
+    let choice = rand.unsigned(10);
+    if choice < 2 {
+        // Insert text at end
+        let inserted_text = rand.words(3);
+        Edit {
+            position: input.len(),
+            deleted_length: 0,
+            inserted_text,
+        }
+    } else if choice < 5 {
+        // Delete text from the end
+        let mut deleted_length = rand.unsigned(10);
+        if deleted_length > input.len() {
+            deleted_length = input.len();
+        }
+        Edit {
+            position: input.len() - deleted_length,
+            deleted_length,
+            inserted_text: vec![],
+        }
+    } else if choice < 8 {
+        // Insert at a random position
+        let position = rand.unsigned(input.len());
+        let word_count = 1 + rand.unsigned(3);
+        let inserted_text = rand.words(word_count);
+        Edit {
+            position,
+            deleted_length: 0,
+            inserted_text,
+        }
+    } else {
+        // Replace at random position
+        let position = rand.unsigned(input.len());
+        let deleted_length = rand.unsigned(input.len() - position);
+        let word_count = 1 + rand.unsigned(3);
+        let inserted_text = rand.words(word_count);
+        Edit {
+            position,
+            deleted_length,
+            inserted_text,
+        }
+    }
+}
+
+fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
+    let mut result = Point { row: 0, column: 0 };
+    for c in &input[0..offset] {
+        if *c as char == '\n' {
+            result.row += 1;
+            result.column = 0;
+        } else {
+            result.column += 1;
+        }
+    }
+    result
+}
diff --git a/cli/src/tests/helpers/mod.rs b/cli/src/tests/helpers/mod.rs
index bd5c6517..2d1ce574 100644
--- a/cli/src/tests/helpers/mod.rs
+++ b/cli/src/tests/helpers/mod.rs
@@ -2,3 +2,4 @@ pub(super) mod allocations;
 pub(super) mod fixtures;
 pub(super) mod random;
 pub(super) mod scope_sequence;
+pub(super) mod edits;
diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs
index a061d8c6..43fbbc1b 100644
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -30,7 +30,7 @@ fn test_basic_parsing() {
 }
 
 #[test]
-fn test_logging() {
+fn test_parsing_with_logging() {
     let mut parser = Parser::new();
     parser.set_language(rust()).unwrap();
 
@@ -57,43 +57,7 @@ fn test_logging() {
 }
 
 #[test]
-fn test_tree_cursor() {
-    let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
-
-    let tree = parser
-        .parse_str(
-            "
-                struct Stuff {
-                    a: A;
-                    b: Option<B>,
-                }
-            ",
-            None,
-        )
-        .unwrap();
-
-    let mut cursor = tree.walk();
-    assert_eq!(cursor.node().kind(), "source_file");
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "struct_item");
-
-    assert!(cursor.goto_first_child());
-    assert_eq!(cursor.node().kind(), "struct");
-    assert_eq!(cursor.node().is_named(), false);
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "type_identifier");
-    assert_eq!(cursor.node().is_named(), true);
-
-    assert!(cursor.goto_next_sibling());
-    assert_eq!(cursor.node().kind(), "field_declaration_list");
-    assert_eq!(cursor.node().is_named(), true);
-}
-
-#[test]
-fn test_custom_utf8_input() {
+fn test_parsing_with_custom_utf8_input() {
     let mut parser = Parser::new();
     parser.set_language(rust()).unwrap();
 
@@ -126,7 +90,7 @@ fn test_custom_utf8_input() {
 }
 
 #[test]
-fn test_custom_utf16_input() {
+fn test_parsing_with_custom_utf16_input() {
     let mut parser = Parser::new();
     parser.set_language(rust()).unwrap();
 
@@ -162,19 +126,7 @@ fn test_custom_utf16_input() {
 }
 
 #[test]
-fn test_node_equality() {
-    let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
-    let tree = parser.parse_str("struct A {}", None).unwrap();
-    let node1 = tree.root_node();
-    let node2 = tree.root_node();
-    assert_eq!(node1, node2);
-    assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
-    assert_ne!(node1.child(0).unwrap(), node2);
-}
-
-#[test]
-fn test_editing() {
+fn test_parsing_after_editing() {
     let mut parser = Parser::new();
     parser.set_language(rust()).unwrap();
 
@@ -256,7 +208,7 @@ fn test_editing() {
 }
 
 #[test]
-fn test_parallel_parsing() {
+fn test_parsing_on_multiple_threads() {
     // Parse this source file so that each thread has a non-trivial amount of
     // work to do.
     let this_file_source = include_str!("parser_test.rs");
diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs
index 401ff03a..d3a16cba 100644
--- a/cli/src/tests/tree_test.rs
+++ b/cli/src/tests/tree_test.rs
@@ -1,10 +1,12 @@
+use super::helpers::edits::{invert_edit, perform_edit, Edit};
 use super::helpers::fixtures::get_language;
-use tree_sitter::{InputEdit, Language, Parser, Point};
+use std::str;
+use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
 
 #[test]
-fn test_edit() {
+fn test_tree_edit() {
     let mut parser = Parser::new();
-    parser.set_language(javascript()).unwrap();
+    parser.set_language(get_language("javascript")).unwrap();
     let tree = parser.parse_str("  abc  !==  def", None).unwrap();
 
     assert_eq!(
@@ -186,6 +188,192 @@ fn test_edit() {
     }
 }
 
-fn javascript() -> Language {
-    get_language("javascript")
+#[test]
+fn test_tree_walk() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("rust")).unwrap();
+
+    let tree = parser
+        .parse_str(
+            "
+                struct Stuff {
+                    a: A;
+                    b: Option<B>,
+                }
+            ",
+            None,
+        )
+        .unwrap();
+
+    let mut cursor = tree.walk();
+    assert_eq!(cursor.node().kind(), "source_file");
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "struct_item");
+
+    assert!(cursor.goto_first_child());
+    assert_eq!(cursor.node().kind(), "struct");
+    assert_eq!(cursor.node().is_named(), false);
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "type_identifier");
+    assert_eq!(cursor.node().is_named(), true);
+
+    assert!(cursor.goto_next_sibling());
+    assert_eq!(cursor.node().kind(), "field_declaration_list");
+    assert_eq!(cursor.node().is_named(), true);
+}
+
+#[test]
+fn test_tree_node_equality() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("rust")).unwrap();
+    let tree = parser.parse_str("struct A {}", None).unwrap();
+    let node1 = tree.root_node();
+    let node2 = tree.root_node();
+    assert_eq!(node1, node2);
+    assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
+    assert_ne!(node1.child(0).unwrap(), node2);
+}
+
+#[test]
+fn test_get_changed_ranges() {
+    let source_code = b"{a: null};\n".to_vec();
+
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+    let tree = parser
+        .parse_utf8(&mut |i, _| &source_code[i..], None)
+        .unwrap();
+
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(program (expression_statement (object (pair (property_identifier) (null)))))"
+    );
+
+    // Updating one token
+    {
+        let mut tree = tree.clone();
+        let mut source_code = source_code.clone();
+
+        // Replace `null` with `nothing` - that token has changed syntax
+        let edit = Edit {
+            position: index_of(&source_code, "ull"),
+            deleted_length: 3,
+            inserted_text: b"othing".to_vec(),
+        };
+        let inverse_edit = invert_edit(&source_code, &edit);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit);
+        assert_eq!(ranges, vec![range_of(&source_code, "nothing")]);
+
+        // Replace `nothing` with `null` - that token has changed syntax
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit);
+        assert_eq!(ranges, vec![range_of(&source_code, "null")]);
+    }
+
+    // Changing only leading whitespace
+    {
+        let mut tree = tree.clone();
+        let mut source_code = source_code.clone();
+
+        // Insert leading newline - no changed ranges
+        let edit = Edit {
+            position: 0,
+            deleted_length: 0,
+            inserted_text: b"\n".to_vec(),
+        };
+        let inverse_edit = invert_edit(&source_code, &edit);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit);
+        assert_eq!(ranges, vec![]);
+
+        // Remove leading newline - no changed ranges
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit);
+        assert_eq!(ranges, vec![]);
+    }
+
+    // Inserting elements
+    {
+        let mut tree = tree.clone();
+        let mut source_code = source_code.clone();
+
+        // Insert a key-value pair before the `}` - those tokens are changed
+        let edit1 = Edit {
+            position: index_of(&source_code, "}"),
+            deleted_length: 0,
+            inserted_text: b", b: false".to_vec(),
+        };
+        let inverse_edit1 = invert_edit(&source_code, &edit1);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1);
+        assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]);
+
+        let edit2 = Edit {
+            position: index_of(&source_code, ", b"),
+            deleted_length: 0,
+            inserted_text: b", c: 1".to_vec(),
+        };
+        let inverse_edit2 = invert_edit(&source_code, &edit2);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit2);
+        assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]);
+
+        // Remove the middle pair
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit2);
+        assert_eq!(ranges, vec![]);
+
+        // Remove the second pair
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1);
+        assert_eq!(ranges, vec![]);
+    }
+
+    // Wrapping elements in larger expressions
+    {
+        let mut tree = tree.clone();
+        let mut source_code = source_code.clone();
+
+        // Replace `null` with the binary expression `b === null`
+        let edit1 = Edit {
+            position: index_of(&source_code, "null"),
+            deleted_length: 0,
+            inserted_text: b"b === ".to_vec(),
+        };
+        let inverse_edit1 = invert_edit(&source_code, &edit1);
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1);
+        assert_eq!(ranges, vec![range_of(&source_code, "b === null")]);
+
+        // Undo
+        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1);
+        assert_eq!(ranges, vec![range_of(&source_code, "null")]);
+    }
+}
+
+fn index_of(text: &Vec<u8>, substring: &str) -> usize {
+    str::from_utf8(text.as_slice())
+        .unwrap()
+        .find(substring)
+        .unwrap()
+}
+
+fn range_of(text: &Vec<u8>, substring: &str) -> Range {
+    let start_byte = index_of(text, substring);
+    let end_byte = start_byte + substring.as_bytes().len();
+    Range {
+        start_byte,
+        end_byte,
+        start_point: Point::new(0, start_byte),
+        end_point: Point::new(0, end_byte),
+    }
+}
+
+fn get_changed_ranges(
+    parser: &mut Parser,
+    tree: &mut Tree,
+    source_code: &mut Vec<u8>,
+    edit: Edit,
+) -> Vec<Range> {
+    perform_edit(tree, source_code, &edit);
+    let new_tree = parser
+        .parse_utf8(&mut |i, _| &source_code[i..], Some(tree))
+        .unwrap();
+    let result = tree.changed_ranges(&new_tree);
+    *tree = new_tree;
+    result
 }

From 6b8483c53c9ac765fbf1114ff902a946b9353f4c Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sat, 2 Feb 2019 21:37:54 -0800
Subject: [PATCH 199/208] Start work on porting included range unit tests

---
 cli/src/tests/parser_test.rs | 144 ++++++++++++++++++++++++++++++++++-
 lib/binding/lib.rs           |  26 ++++++-
 2 files changed, 168 insertions(+), 2 deletions(-)

diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs
index 43fbbc1b..8a11d22a 100644
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -1,6 +1,6 @@
 use super::helpers::fixtures::get_language;
 use std::thread;
-use tree_sitter::{InputEdit, Language, LogType, Parser, Point};
+use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Range};
 
 #[test]
 fn test_basic_parsing() {
@@ -260,6 +260,148 @@ fn test_parsing_on_multiple_threads() {
     assert_eq!(child_count_differences, &[1, 2, 3, 4]);
 }
 
+// Included Ranges
+
+#[test]
+fn test_parsing_with_one_included_range() {
+    let source_code = "<span>hi</span><script>console.log('sup');</script>";
+
+    let mut parser = Parser::new();
+    parser.set_language(get_language("html")).unwrap();
+    let html_tree = parser.parse_str(source_code, None).unwrap();
+    let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap();
+    assert_eq!(script_content_node.kind(), "raw_text");
+
+    parser.set_included_ranges(&[script_content_node.range()]);
+    parser.set_language(get_language("javascript")).unwrap();
+    let js_tree = parser.parse_str(source_code, None).unwrap();
+
+    assert_eq!(
+        js_tree.root_node().to_sexp(),
+        concat!(
+            "(program (expression_statement (call_expression",
+            " (member_expression (identifier) (property_identifier))",
+            " (arguments (string)))))",
+        )
+    );
+    assert_eq!(
+        js_tree.root_node().start_position(),
+        Point::new(0, source_code.find("console").unwrap())
+    );
+}
+
+#[test]
+fn test_parsing_with_multiple_included_ranges() {
+    let source_code = "html `<div>Hello, ${name.toUpperCase()}, it's <b>${now()}</b>.</div>`";
+
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+    let js_tree = parser.parse_str(source_code, None).unwrap();
+    let template_string_node = js_tree
+        .root_node()
+        .descendant_for_byte_range(
+            source_code.find("<div>").unwrap(),
+            source_code.find("Hello").unwrap(),
+        )
+        .unwrap();
+    assert_eq!(template_string_node.kind(), "template_string");
+
+    let open_quote_node = template_string_node.child(0).unwrap();
+    let interpolation_node1 = template_string_node.child(1).unwrap();
+    let interpolation_node2 = template_string_node.child(2).unwrap();
+    let close_quote_node = template_string_node.child(3).unwrap();
+
+    parser.set_language(get_language("html")).unwrap();
+    parser.set_included_ranges(&[
+        Range {
+            start_byte: open_quote_node.end_byte(),
+            start_point: open_quote_node.end_position(),
+            end_byte: interpolation_node1.start_byte(),
+            end_point: interpolation_node1.start_position(),
+        },
+        Range {
+            start_byte: interpolation_node1.end_byte(),
+            start_point: interpolation_node1.end_position(),
+            end_byte: interpolation_node2.start_byte(),
+            end_point: interpolation_node2.start_position(),
+        },
+        Range {
+            start_byte: interpolation_node2.end_byte(),
+            start_point: interpolation_node2.end_position(),
+            end_byte: close_quote_node.start_byte(),
+            end_point: close_quote_node.start_position(),
+        },
+    ]);
+    let html_tree = parser.parse_str(source_code, None).unwrap();
+
+    assert_eq!(
+        html_tree.root_node().to_sexp(),
+        concat!(
+            "(fragment (element",
+            " (start_tag (tag_name))",
+            " (text)",
+            " (element (start_tag (tag_name)) (end_tag (tag_name)))",
+            " (text)",
+            " (end_tag (tag_name))))",
+        )
+    );
+
+    let div_element_node = html_tree.root_node().child(0).unwrap();
+    let hello_text_node = div_element_node.child(1).unwrap();
+    let b_element_node = div_element_node.child(2).unwrap();
+    let b_start_tag_node = b_element_node.child(0).unwrap();
+    let b_end_tag_node = b_element_node.child(1).unwrap();
+
+    assert_eq!(hello_text_node.kind(), "text");
+    assert_eq!(
+        hello_text_node.start_byte(),
+        source_code.find("Hello").unwrap()
+    );
+    assert_eq!(hello_text_node.end_byte(), source_code.find("<b>").unwrap());
+
+    assert_eq!(b_start_tag_node.kind(), "start_tag");
+    assert_eq!(
+        b_start_tag_node.start_byte(),
+        source_code.find("<b>").unwrap()
+    );
+    assert_eq!(
+        b_start_tag_node.end_byte(),
+        source_code.find("${now()}").unwrap()
+    );
+
+    assert_eq!(b_end_tag_node.kind(), "end_tag");
+    assert_eq!(
+        b_end_tag_node.start_byte(),
+        source_code.find("</b>").unwrap()
+    );
+    assert_eq!(
+        b_end_tag_node.end_byte(),
+        source_code.find(".</div>").unwrap()
+    );
+}
+
+#[test]
+fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
+    let source_code = "<script>a.</script>";
+    let utf16_source_code: Vec<u16> = source_code.as_bytes().iter().map(|c| *c as u16).collect();
+
+    let start_byte = 2 * source_code.find("a.").unwrap();
+    let end_byte = 2 * source_code.find("</script>").unwrap();
+
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+    parser.set_included_ranges(&[Range {
+        start_byte,
+        end_byte,
+        start_point: Point::new(0, start_byte),
+        end_point: Point::new(0, end_byte),
+    }]);
+    let tree = parser
+        .parse_utf16(&mut |i, _| &utf16_source_code[i..], None)
+        .unwrap();
+    assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))");
+}
+
 fn rust() -> Language {
     get_language("rust")
 }
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index 150dfcf4..9e04ed35 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -261,7 +261,7 @@ impl Parser {
     ) -> Option<Tree> {
         self.parse_utf16_ptr(
             &mut |byte, position| {
-                let slice = input(byte, position);
+                let slice = input(byte / 2, position);
                 (slice.as_ptr(), slice.len())
             },
             old_tree,
@@ -570,6 +570,30 @@ impl<'tree> Node<'tree> {
         Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) })
     }
 
+    pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
+        Self::new(unsafe {
+            ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32)
+        })
+    }
+
+    pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
+        Self::new(unsafe {
+            ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32)
+        })
+    }
+
+    pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option<Self> {
+        Self::new(unsafe {
+            ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into())
+        })
+    }
+
+    pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option<Self> {
+        Self::new(unsafe {
+            ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into())
+        })
+    }
+
     pub fn to_sexp(&self) -> String {
         let c_string = unsafe { ffi::ts_node_string(self.0) };
         let result = unsafe { CStr::from_ptr(c_string) }

From b5c057ba0420deb69a162f07bff13d60e83a3125 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Sun, 3 Feb 2019 13:59:27 -0800
Subject: [PATCH 200/208] 0.14.0-beta4

---
 Cargo.lock                | 2 +-
 cli/Cargo.toml            | 2 +-
 cli/npm/package-lock.json | 2 +-
 cli/npm/package.json      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5c2dcd62..407d1189 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -572,7 +572,7 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-cli"
-version = "0.14.0-beta3"
+version = "0.14.0-beta4"
 dependencies = [
  "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 35b6c7a0..c63209db 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tree-sitter-cli"
-version = "0.14.0-beta3"
+version = "0.14.0-beta4"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 
diff --git a/cli/npm/package-lock.json b/cli/npm/package-lock.json
index 4590ac72..fa5b766f 100644
--- a/cli/npm/package-lock.json
+++ b/cli/npm/package-lock.json
@@ -1,5 +1,5 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.14.0-beta3",
+  "version": "0.14.0-beta4",
   "lockfileVersion": 1
 }
diff --git a/cli/npm/package.json b/cli/npm/package.json
index 276ea9d8..5dec57d7 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -1,6 +1,6 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.14.0-beta3",
+  "version": "0.14.0-beta4",
   "author": "Max Brunsfeld",
   "license": "MIT",
   "repository": {

From 59f7511b1c9e9ae269278f05e4b0843a3d086922 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 4 Feb 2019 09:12:25 -0800
Subject: [PATCH 201/208] Fix test command's exit code

---
 cli/src/main.rs | 4 +---
 cli/src/test.rs | 8 ++++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 299ab896..3fb1890d 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -102,9 +102,7 @@ fn run() -> error::Result<()> {
         let filter = matches.value_of("filter");
         let corpus_path = current_dir.join("corpus");
         if let Some(language) = loader.language_at_path(&current_dir)? {
-            if !test::run_tests_at_path(language, &corpus_path, debug, debug_graph, filter)? {
-                exit(1);
-            }
+            test::run_tests_at_path(language, &corpus_path, debug, debug_graph, filter)?;
         } else {
             eprintln!("No language found");
         }
diff --git a/cli/src/test.rs b/cli/src/test.rs
index c8330af9..7a2fab25 100644
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@@ -1,4 +1,4 @@
-use super::error::Result;
+use super::error::{Error, Result};
 use super::util;
 use ansi_term::Colour;
 use difference::{Changeset, Difference};
@@ -52,7 +52,7 @@ pub fn run_tests_at_path(
     debug: bool,
     debug_graph: bool,
     filter: Option<&str>,
-) -> Result<bool> {
+) -> Result<()> {
     let test_entry = parse_tests(path)?;
     let mut _log_session = None;
     let mut parser = Parser::new();
@@ -90,9 +90,9 @@ pub fn run_tests_at_path(
             println!("\n  {}. {}:", i + 1, name);
             print_diff(actual, expected);
         }
-        Ok(true)
+        Err(Error(String::new()))
     } else {
-        Ok(false)
+        Ok(())
     }
 }
 

From e62a8a2302104e5b2bdfc194e54bb7859684ab22 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 4 Feb 2019 10:38:44 -0800
Subject: [PATCH 202/208] Port more parser unit tests

---
 cli/src/tests/parser_test.rs | 268 +++++++++++++++++++++++++++++++++--
 1 file changed, 257 insertions(+), 11 deletions(-)

diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs
index 8a11d22a..94694a32 100644
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -1,11 +1,11 @@
 use super::helpers::fixtures::get_language;
-use std::thread;
-use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Range};
+use std::{thread, usize};
+use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
 
 #[test]
 fn test_basic_parsing() {
     let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
+    parser.set_language(get_language("rust")).unwrap();
 
     let tree = parser
         .parse_str(
@@ -32,7 +32,7 @@ fn test_basic_parsing() {
 #[test]
 fn test_parsing_with_logging() {
     let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
+    parser.set_language(get_language("rust")).unwrap();
 
     let mut messages = Vec::new();
     parser.set_logger(Some(Box::new(|log_type, message| {
@@ -59,7 +59,7 @@ fn test_parsing_with_logging() {
 #[test]
 fn test_parsing_with_custom_utf8_input() {
     let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
+    parser.set_language(get_language("rust")).unwrap();
 
     let lines = &["pub fn foo() {", "  1", "}"];
 
@@ -92,7 +92,7 @@ fn test_parsing_with_custom_utf8_input() {
 #[test]
 fn test_parsing_with_custom_utf16_input() {
     let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
+    parser.set_language(get_language("rust")).unwrap();
 
     let lines: Vec<Vec<u16>> = ["pub fn foo() {", "  1", "}"]
         .iter()
@@ -128,7 +128,7 @@ fn test_parsing_with_custom_utf16_input() {
 #[test]
 fn test_parsing_after_editing() {
     let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
+    parser.set_language(get_language("rust")).unwrap();
 
     let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
     let mut input_bytes_read = Vec::new();
@@ -214,7 +214,7 @@ fn test_parsing_on_multiple_threads() {
     let this_file_source = include_str!("parser_test.rs");
 
     let mut parser = Parser::new();
-    parser.set_language(rust()).unwrap();
+    parser.set_language(get_language("rust")).unwrap();
     let tree = parser.parse_str(this_file_source, None).unwrap();
 
     let mut parse_threads = Vec::new();
@@ -242,7 +242,7 @@ fn test_parsing_on_multiple_threads() {
 
             // Reparse using the old tree as a starting point.
             let mut parser = Parser::new();
-            parser.set_language(rust()).unwrap();
+            parser.set_language(get_language("rust")).unwrap();
             parser
                 .parse_str(&prepended_source, Some(&tree_clone))
                 .unwrap()
@@ -260,6 +260,76 @@ fn test_parsing_on_multiple_threads() {
     assert_eq!(child_count_differences, &[1, 2, 3, 4]);
 }
 
+// Operation limits
+
+#[test]
+fn test_parsing_with_an_operation_limit() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("json")).unwrap();
+
+    // Start parsing from an infinite input. Parsing should abort after 5 "operations".
+    parser.set_operation_limit(5);
+    let mut call_count = 0;
+    let tree = parser.parse_utf8(&mut |_, _| {
+        if call_count == 0 {
+            call_count += 1;
+            b"[0"
+        } else {
+            call_count += 1;
+            b", 0"
+        }
+    }, None);
+    assert!(tree.is_none());
+    assert!(call_count >= 3);
+    assert!(call_count <= 8);
+
+    // Resume parsing from the previous state.
+    call_count = 0;
+    parser.set_operation_limit(20);
+    let tree = parser.parse_utf8(&mut |_, _| {
+        if call_count == 0 {
+            call_count += 1;
+            b"]"
+        } else {
+            b""
+        }
+    }, None).unwrap();
+    assert_eq!(tree.root_node().to_sexp(), "(value (array (number) (number) (number)))");
+}
+
+#[test]
+fn test_parsing_with_a_reset_after_reaching_an_operation_limit() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("json")).unwrap();
+
+    parser.set_operation_limit(3);
+    let tree = parser.parse_str("[1234, 5, 6, 7, 8]", None);
+    assert!(tree.is_none());
+
+    // Without calling reset, the parser continues from where it left off, so
+    // it does not see the changes to the beginning of the source code.
+    parser.set_operation_limit(usize::MAX);
+    let tree = parser.parse_str("[null, 5, 6, 4, 5]", None).unwrap();
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(value (array (number) (number) (number) (number) (number)))"
+    );
+
+    parser.set_operation_limit(3);
+    let tree = parser.parse_str("[1234, 5, 6, 7, 8]", None);
+    assert!(tree.is_none());
+
+    // By calling reset, we force the parser to start over from scratch so
+    // that it sees the changes to the beginning of the source code.
+    parser.set_operation_limit(usize::MAX);
+    parser.reset();
+    let tree = parser.parse_str("[null, 5, 6, 4, 5]", None).unwrap();
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(value (array (null) (number) (number) (number) (number)))"
+    );
+}
+
 // Included Ranges
 
 #[test]
@@ -402,6 +472,182 @@ fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
     assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))");
 }
 
-fn rust() -> Language {
-    get_language("rust")
+#[test]
+fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() {
+    let source_code = "a <%= b() %> c <% d() %>";
+    let range1_start_byte = source_code.find(" b() ").unwrap();
+    let range1_end_byte = range1_start_byte + " b() ".len();
+    let range2_start_byte = source_code.find(" d() ").unwrap();
+    let range2_end_byte = range2_start_byte + " d() ".len();
+
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+    parser.set_included_ranges(&[
+        Range {
+            start_byte: range1_start_byte,
+            end_byte: range1_end_byte,
+            start_point: Point::new(0, range1_start_byte),
+            end_point: Point::new(0, range1_end_byte),
+        },
+        Range {
+            start_byte: range2_start_byte,
+            end_byte: range2_end_byte,
+            start_point: Point::new(0, range2_start_byte),
+            end_point: Point::new(0, range2_end_byte),
+        },
+    ]);
+
+    let tree = parser.parse_str(source_code, None).unwrap();
+    let root = tree.root_node();
+    let statement1 = root.child(0).unwrap();
+    let statement2 = root.child(1).unwrap();
+
+    assert_eq!(
+        root.to_sexp(),
+        concat!(
+            "(program",
+            " (expression_statement (call_expression (identifier) (arguments)))",
+            " (expression_statement (call_expression (identifier) (arguments))))"
+        )
+    );
+
+    assert_eq!(statement1.start_byte(), source_code.find("b()").unwrap());
+    assert_eq!(statement1.end_byte(), source_code.find(" %> c").unwrap());
+    assert_eq!(statement2.start_byte(), source_code.find("d()").unwrap());
+    assert_eq!(statement2.end_byte(), source_code.len() - " %>".len());
+}
+
+#[test]
+fn test_parsing_with_a_newly_excluded_range() {
+    let mut source_code = String::from("<div><span><%= something %></span></div>");
+
+    // Parse HTML including the template directive, which will cause an error
+    let mut parser = Parser::new();
+    parser.set_language(get_language("html")).unwrap();
+    let mut first_tree = parser.parse_str(&source_code, None).unwrap();
+
+    // Insert code at the beginning of the document.
+    let prefix = "a very very long line of plain text. ";
+    first_tree.edit(&InputEdit {
+        start_byte: 0,
+        old_end_byte: 0,
+        new_end_byte: prefix.len(),
+        start_position: Point::new(0, 0),
+        old_end_position: Point::new(0, 0),
+        new_end_position: Point::new(0, prefix.len()),
+    });
+    source_code.insert_str(0, prefix);
+
+    // Parse the HTML again, this time *excluding* the template directive
+    // (which has moved since the previous parse).
+    let directive_start = source_code.find("<%=").unwrap();
+    let directive_end = source_code.find("</span>").unwrap();
+    let source_code_end = source_code.len();
+    parser.set_included_ranges(&[
+        Range {
+            start_byte: 0,
+            end_byte: directive_start,
+            start_point: Point::new(0, 0),
+            end_point: Point::new(0, directive_start),
+        },
+        Range {
+            start_byte: directive_end,
+            end_byte: source_code_end,
+            start_point: Point::new(0, directive_end),
+            end_point: Point::new(0, source_code_end),
+        },
+    ]);
+    let tree = parser.parse_str(&source_code, Some(&first_tree)).unwrap();
+
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        concat!(
+            "(fragment (text) (element",
+            " (start_tag (tag_name))",
+            " (element (start_tag (tag_name)) (end_tag (tag_name)))",
+            " (end_tag (tag_name))))"
+        )
+    );
+
+    assert_eq!(
+        tree.changed_ranges(&first_tree),
+        vec![
+            // The first range that has changed syntax is the range of the newly-inserted text.
+            Range {
+                start_byte: 0,
+                end_byte: prefix.len(),
+                start_point: Point::new(0, 0),
+                end_point: Point::new(0, prefix.len()),
+            },
+            // Even though no edits were applied to the outer `div` element,
+            // its contents have changed syntax because a range of text that
+            // was previously included is now excluded.
+            Range {
+                start_byte: directive_start,
+                end_byte: directive_end,
+                start_point: Point::new(0, directive_start),
+                end_point: Point::new(0, directive_end),
+            },
+        ]
+    );
+}
+
+#[test]
+fn test_parsing_with_a_newly_included_range() {
+    let source_code = "<div><%= foo() %></div><div><%= bar() %>";
+    let first_code_start_index = source_code.find(" foo").unwrap();
+    let first_code_end_index = first_code_start_index + 7;
+    let second_code_start_index = source_code.find(" bar").unwrap();
+    let second_code_end_index = second_code_start_index + 7;
+    let ranges = [
+        Range {
+            start_byte: first_code_start_index,
+            end_byte: first_code_end_index,
+            start_point: Point::new(0, first_code_start_index),
+            end_point: Point::new(0, first_code_end_index),
+        },
+        Range {
+            start_byte: second_code_start_index,
+            end_byte: second_code_end_index,
+            start_point: Point::new(0, second_code_start_index),
+            end_point: Point::new(0, second_code_end_index),
+        },
+    ];
+
+    // Parse only the first code directive as JavaScript
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+    parser.set_included_ranges(&ranges[0..1]);
+    let first_tree = parser.parse_str(source_code, None).unwrap();
+    assert_eq!(
+        first_tree.root_node().to_sexp(),
+        concat!(
+            "(program",
+            " (expression_statement (call_expression (identifier) (arguments))))",
+        )
+    );
+
+    // Parse both the code directives as JavaScript, using the old tree as a reference.
+    parser.set_included_ranges(&ranges);
+    let tree = parser.parse_str(&source_code, Some(&first_tree)).unwrap();
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        concat!(
+            "(program",
+            " (expression_statement (call_expression (identifier) (arguments)))",
+            " (expression_statement (call_expression (identifier) (arguments))))",
+        )
+    );
+
+    assert_eq!(
+        tree.changed_ranges(&first_tree),
+        vec![
+            Range {
+                start_byte: first_code_end_index + 1,
+                end_byte: second_code_end_index + 1,
+                start_point: Point::new(0, first_code_end_index + 1),
+                end_point: Point::new(0, second_code_end_index + 1),
+            }
+        ]
+    );
 }

From 4a98f0b87ef0eeb965ad1e2f55ed3d7e7ca45e0a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 4 Feb 2019 14:44:06 -0800
Subject: [PATCH 203/208] Port unit test for missing tokens and included ranges

---
 cli/src/generate/mod.rs           |   2 +-
 cli/src/tests/corpus_test.rs      |   2 +-
 cli/src/tests/helpers/fixtures.rs |  16 ++--
 cli/src/tests/parser_test.rs      | 123 +++++++++++++++++++++++-------
 4 files changed, 107 insertions(+), 36 deletions(-)

diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs
index 127e956e..b00379af 100644
--- a/cli/src/generate/mod.rs
+++ b/cli/src/generate/mod.rs
@@ -58,7 +58,7 @@ pub fn generate_parser_in_directory(
     Ok(())
 }
 
-pub fn generate_parser_for_grammar(grammar_json: &String) -> Result<(String, String)> {
+pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
     let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
     generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
 }
diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs
index f1990963..c2e8b6c8 100644
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@@ -239,7 +239,7 @@ fn test_feature_corpus_files() {
         } else {
             let corpus_path = test_path.join("corpus.txt");
             let c_code = generate_result.unwrap().1;
-            let language = get_test_language(language_name, c_code, &test_path);
+            let language = get_test_language(language_name, &c_code, Some(&test_path));
             let test = parse_tests(&corpus_path).unwrap();
             let tests = flatten_tests(test);
 
diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs
index 981f0ab6..a5ea9ed0 100644
--- a/cli/src/tests/helpers/fixtures.rs
+++ b/cli/src/tests/helpers/fixtures.rs
@@ -20,7 +20,7 @@ pub fn get_language(name: &str) -> Language {
         .unwrap()
 }
 
-pub fn get_test_language(name: &str, parser_code: String, path: &Path) -> Language {
+pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
     let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name));
     if !fs::read_to_string(&parser_c_path)
         .map(|content| content == parser_code)
@@ -28,12 +28,14 @@ pub fn get_test_language(name: &str, parser_code: String, path: &Path) -> Langua
     {
         fs::write(&parser_c_path, parser_code).unwrap();
     }
-    let scanner_path = path.join("scanner.c");
-    let scanner_path = if scanner_path.exists() {
-        Some(scanner_path)
-    } else {
-        None
-    };
+    let scanner_path = path.and_then(|p| {
+        let result = p.join("scanner.c");
+        if result.exists() {
+            Some(result)
+        } else {
+            None
+        }
+    });
     TEST_LOADER
         .load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
         .unwrap()
diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs
index 94694a32..6790d37f 100644
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -1,4 +1,5 @@
-use super::helpers::fixtures::get_language;
+use super::helpers::fixtures::{get_language, get_test_language};
+use crate::generate::generate_parser_for_grammar;
 use std::{thread, usize};
 use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
 
@@ -270,15 +271,18 @@ fn test_parsing_with_an_operation_limit() {
     // Start parsing from an infinite input. Parsing should abort after 5 "operations".
     parser.set_operation_limit(5);
     let mut call_count = 0;
-    let tree = parser.parse_utf8(&mut |_, _| {
-        if call_count == 0 {
-            call_count += 1;
-            b"[0"
-        } else {
-            call_count += 1;
-            b", 0"
-        }
-    }, None);
+    let tree = parser.parse_utf8(
+        &mut |_, _| {
+            if call_count == 0 {
+                call_count += 1;
+                b"[0"
+            } else {
+                call_count += 1;
+                b", 0"
+            }
+        },
+        None,
+    );
     assert!(tree.is_none());
     assert!(call_count >= 3);
     assert!(call_count <= 8);
@@ -286,15 +290,23 @@ fn test_parsing_with_an_operation_limit() {
     // Resume parsing from the previous state.
     call_count = 0;
     parser.set_operation_limit(20);
-    let tree = parser.parse_utf8(&mut |_, _| {
-        if call_count == 0 {
-            call_count += 1;
-            b"]"
-        } else {
-            b""
-        }
-    }, None).unwrap();
-    assert_eq!(tree.root_node().to_sexp(), "(value (array (number) (number) (number)))");
+    let tree = parser
+        .parse_utf8(
+            &mut |_, _| {
+                if call_count == 0 {
+                    call_count += 1;
+                    b"]"
+                } else {
+                    b""
+                }
+            },
+            None,
+        )
+        .unwrap();
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        "(value (array (number) (number) (number)))"
+    );
 }
 
 #[test]
@@ -641,13 +653,70 @@ fn test_parsing_with_a_newly_included_range() {
 
     assert_eq!(
         tree.changed_ranges(&first_tree),
-        vec![
-            Range {
-                start_byte: first_code_end_index + 1,
-                end_byte: second_code_end_index + 1,
-                start_point: Point::new(0, first_code_end_index + 1),
-                end_point: Point::new(0, second_code_end_index + 1),
-            }
-        ]
+        vec![Range {
+            start_byte: first_code_end_index + 1,
+            end_byte: second_code_end_index + 1,
+            start_point: Point::new(0, first_code_end_index + 1),
+            end_point: Point::new(0, second_code_end_index + 1),
+        }]
     );
 }
+
+#[test]
+fn test_parsing_with_included_ranges_and_missing_tokens() {
+    let (parser_name, parser_code) = generate_parser_for_grammar(
+        r#"{
+            "name": "test_leading_missing_token",
+            "rules": {
+                "program": {
+                    "type": "SEQ",
+                    "members": [
+                        {"type": "SYMBOL", "name": "A"},
+                        {"type": "SYMBOL", "name": "b"},
+                        {"type": "SYMBOL", "name": "c"},
+                        {"type": "SYMBOL", "name": "A"},
+                        {"type": "SYMBOL", "name": "b"},
+                        {"type": "SYMBOL", "name": "c"}
+                    ]
+                },
+                "A": {"type": "SYMBOL", "name": "a"},
+                "a": {"type": "STRING", "value": "a"},
+                "b": {"type": "STRING", "value": "b"},
+                "c": {"type": "STRING", "value": "c"}
+            }
+        }"#,
+    )
+    .unwrap();
+
+    let mut parser = Parser::new();
+    parser
+        .set_language(get_test_language(&parser_name, &parser_code, None))
+        .unwrap();
+
+    // There's a missing `a` token at the beginning of the code. It must be inserted
+    // at the beginning of the first included range, not at {0, 0}.
+    let source_code = "__bc__bc__";
+    parser.set_included_ranges(&[
+        Range {
+            start_byte: 2,
+            end_byte: 4,
+            start_point: Point::new(0, 2),
+            end_point: Point::new(0, 4),
+        },
+        Range {
+            start_byte: 6,
+            end_byte: 8,
+            start_point: Point::new(0, 6),
+            end_point: Point::new(0, 8),
+        },
+    ]);
+
+    let tree = parser.parse_str(source_code, None).unwrap();
+    let root = tree.root_node();
+    assert_eq!(
+        root.to_sexp(),
+        "(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))"
+    );
+    assert_eq!(root.start_byte(), 2);
+    assert_eq!(root.child(3).unwrap().start_byte(), 4);
+}

From 9a8cf39277c2a6a3f39112f00a453775a65d8f00 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 4 Feb 2019 16:43:21 -0800
Subject: [PATCH 204/208] Add incremental parsing unit tests

---
 cli/src/tests/helpers/edits.rs |  49 +++++++++++
 cli/src/tests/parser_test.rs   | 146 ++++++++++++++++++---------------
 2 files changed, 127 insertions(+), 68 deletions(-)

diff --git a/cli/src/tests/helpers/edits.rs b/cli/src/tests/helpers/edits.rs
index 4e4d0c25..d4eba7d9 100644
--- a/cli/src/tests/helpers/edits.rs
+++ b/cli/src/tests/helpers/edits.rs
@@ -1,4 +1,6 @@
 use super::random::Rand;
+use std::ops::Range;
+use std::str;
 use tree_sitter::{InputEdit, Point, Tree};
 
 pub struct Edit {
@@ -7,6 +9,53 @@ pub struct Edit {
     pub inserted_text: Vec<u8>,
 }
 
+#[derive(Debug)]
+pub struct ReadRecorder<'a> {
+    content: &'a Vec<u8>,
+    indices_read: Vec<usize>,
+}
+
+impl<'a> ReadRecorder<'a> {
+    pub fn new(content: &'a Vec<u8>) -> Self {
+        Self {
+            content,
+            indices_read: Vec::new(),
+        }
+    }
+
+    pub fn read(&mut self, offset: usize) -> &'a [u8] {
+        if offset < self.content.len() {
+            if let Err(i) = self.indices_read.binary_search(&offset) {
+                self.indices_read.insert(i, offset);
+            }
+            &self.content[offset..(offset + 1)]
+        } else {
+            &[]
+        }
+    }
+
+    pub fn strings_read(&self) -> Vec<&'a str> {
+        let mut result = Vec::new();
+        let mut last_range: Option<Range<usize>> = None;
+        for index in self.indices_read.iter() {
+            if let Some(ref mut range) = &mut last_range {
+                if range.end == *index {
+                    range.end += 1;
+                } else {
+                    result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
+                    last_range = None;
+                }
+            } else {
+                last_range = Some(*index..(*index + 1));
+            }
+        }
+        if let Some(range) = last_range {
+            result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
+        }
+        result
+    }
+}
+
 pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) {
     let start_byte = edit.position;
     let old_end_byte = edit.position + edit.deleted_length;
diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs
index 6790d37f..32554e7f 100644
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@@ -1,3 +1,4 @@
+use super::helpers::edits::{perform_edit, Edit, ReadRecorder};
 use super::helpers::fixtures::{get_language, get_test_language};
 use crate::generate::generate_parser_for_grammar;
 use std::{thread, usize};
@@ -126,88 +127,97 @@ fn test_parsing_with_custom_utf16_input() {
     assert_eq!(root.child(0).unwrap().kind(), "function_item");
 }
 
+// Incremental parsing
+
 #[test]
-fn test_parsing_after_editing() {
+fn test_parsing_after_editing_beginning_of_code() {
     let mut parser = Parser::new();
-    parser.set_language(get_language("rust")).unwrap();
+    parser.set_language(get_language("javascript")).unwrap();
 
-    let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
-    let mut input_bytes_read = Vec::new();
-
-    let mut tree = parser
-        .parse_utf8(
-            &mut |offset, _| {
-                let offset = offset;
-                if offset < input_bytes.len() {
-                    let result = &input_bytes[offset..offset + 1];
-                    input_bytes_read.extend(result.iter());
-                    result
-                } else {
-                    &[]
-                }
-            },
-            None,
-        )
-        .unwrap();
-
-    let parameters_sexp = tree
-        .root_node()
-        .named_child(0)
-        .unwrap()
-        .named_child(1)
-        .unwrap()
-        .to_sexp();
+    let mut code = b"123 + 456 * (10 + x);".to_vec();
+    let mut tree = parser.parse_utf8(&mut |i, _| &code[i..], None).unwrap();
     assert_eq!(
-        parameters_sexp,
-        "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
+        tree.root_node().to_sexp(),
+        concat!(
+            "(program (expression_statement (binary_expression ",
+              "(number) ",
+              "(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier)))))))",
+        )
     );
 
-    input_bytes_read.clear();
-    input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
-    tree.edit(&InputEdit {
-        start_byte: 14,
-        old_end_byte: 14,
-        new_end_byte: 20,
-        start_position: Point::new(0, 14),
-        old_end_position: Point::new(0, 14),
-        new_end_position: Point::new(0, 20),
-    });
+    perform_edit(
+        &mut tree,
+        &mut code,
+        &Edit {
+            position: 3,
+            deleted_length: 0,
+            inserted_text: b" || 5".to_vec(),
+        },
+    );
 
+    let mut recorder = ReadRecorder::new(&code);
     let tree = parser
-        .parse_utf8(
-            &mut |offset, _| {
-                let offset = offset;
-                if offset < input_bytes.len() {
-                    let result = &input_bytes[offset..offset + 1];
-                    input_bytes_read.extend(result.iter());
-                    result
-                } else {
-                    &[]
-                }
-            },
-            Some(&tree),
-        )
+        .parse_utf8(&mut |i, _| recorder.read(i), Some(&tree))
         .unwrap();
-
-    let parameters_sexp = tree
-        .root_node()
-        .named_child(0)
-        .unwrap()
-        .named_child(1)
-        .unwrap()
-        .to_sexp();
     assert_eq!(
-        parameters_sexp,
-        "(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
+        tree.root_node().to_sexp(),
+        concat!(
+            "(program (expression_statement (binary_expression ",
+              "(number) ",
+              "(binary_expression ",
+                "(number) ",
+                "(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier))))))))",
+        )
     );
 
-    let retokenized_content = String::from_utf8(input_bytes_read).unwrap();
-    assert!(retokenized_content.contains("b: B"));
-    assert!(!retokenized_content.contains("a: A"));
-    assert!(!retokenized_content.contains("c: C"));
-    assert!(!retokenized_content.contains("{}"));
+    assert_eq!(recorder.strings_read(), vec!["123 || 5 "]);
 }
 
+#[test]
+fn test_parsing_after_editing_end_of_code() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+
+    let mut code = b"x * (100 + abc);".to_vec();
+    let mut tree = parser.parse_utf8(&mut |i, _| &code[i..], None).unwrap();
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        concat!(
+            "(program (expression_statement (binary_expression ",
+              "(identifier) ",
+              "(parenthesized_expression (binary_expression (number) (identifier))))))",
+        )
+    );
+
+    let position = code.len() - 2;
+    perform_edit(
+        &mut tree,
+        &mut code,
+        &Edit {
+            position,
+            deleted_length: 0,
+            inserted_text: b".d".to_vec(),
+        },
+    );
+
+    let mut recorder = ReadRecorder::new(&code);
+    let tree = parser
+        .parse_utf8(&mut |i, _| recorder.read(i), Some(&tree))
+        .unwrap();
+    assert_eq!(
+        tree.root_node().to_sexp(),
+        concat!(
+            "(program (expression_statement (binary_expression ",
+              "(identifier) ",
+              "(parenthesized_expression (binary_expression (number) (member_expression (identifier) (property_identifier)))))))"
+        )
+    );
+
+    assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]);
+}
+
+// Thread safety
+
 #[test]
 fn test_parsing_on_multiple_threads() {
     // Parse this source file so that each thread has a non-trivial amount of

From efe79889be94623325c0d32ed4912766066a0d9a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 4 Feb 2019 20:42:56 -0800
Subject: [PATCH 205/208] Port node tests

---
 cli/src/tests/helpers/edits.rs |   8 +-
 cli/src/tests/mod.rs           |   1 +
 cli/src/tests/node_test.rs     | 364 +++++++++++++++++++++++++++++++++
 lib/binding/lib.rs             |  27 ++-
 4 files changed, 389 insertions(+), 11 deletions(-)
 create mode 100644 cli/src/tests/node_test.rs

diff --git a/cli/src/tests/helpers/edits.rs b/cli/src/tests/helpers/edits.rs
index d4eba7d9..e84477c4 100644
--- a/cli/src/tests/helpers/edits.rs
+++ b/cli/src/tests/helpers/edits.rs
@@ -56,7 +56,7 @@ impl<'a> ReadRecorder<'a> {
     }
 }
 
-pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) {
+pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
     let start_byte = edit.position;
     let old_end_byte = edit.position + edit.deleted_length;
     let new_end_byte = edit.position + edit.inserted_text.len();
@@ -64,14 +64,16 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) {
     let old_end_position = position_for_offset(input, old_end_byte);
     input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
     let new_end_position = position_for_offset(input, new_end_byte);
-    tree.edit(&InputEdit {
+    let edit = InputEdit {
         start_byte,
         old_end_byte,
         new_end_byte,
         start_position,
         old_end_position,
         new_end_position,
-    });
+    };
+    tree.edit(&edit);
+    edit
 }
 
 pub fn invert_edit(input: &Vec<u8>, edit: &Edit) -> Edit {
diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index b8f6ad1f..af2b4582 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -1,5 +1,6 @@
 mod corpus_test;
 mod helpers;
+mod node_test;
 mod parser_test;
 mod properties_test;
 mod tree_test;
diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs
new file mode 100644
index 00000000..fc6038f4
--- /dev/null
+++ b/cli/src/tests/node_test.rs
@@ -0,0 +1,364 @@
+use super::helpers::fixtures::{get_language, get_test_language};
+use super::helpers::random::Rand;
+use super::helpers::edits::{get_random_edit, perform_edit};
+use crate::generate::generate_parser_for_grammar;
+use tree_sitter::{Node, Parser, Point, Tree};
+
+const JSON_EXAMPLE: &'static str = r#"
+
+[
+  123,
+  false,
+  {
+    "x": null
+  }
+]
+"#;
+
+const GRAMMAR_WITH_ALIASES_AND_EXTRAS: &'static str = r#"{
+  "name": "aliases_and_extras",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s+"},
+    {"type": "SYMBOL", "name": "comment"}
+  ],
+
+  "rules": {
+    "a": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "b"},
+        {
+          "type": "ALIAS",
+          "value": "B",
+          "named": true,
+          "content": {"type": "SYMBOL", "name": "b"}
+        },
+        {
+          "type": "ALIAS",
+          "value": "C",
+          "named": true,
+          "content": {"type": "SYMBOL", "name": "_c"}
+        }
+      ]
+    },
+
+    "b": {"type": "STRING", "value": "b"},
+
+    "_c": {"type": "STRING", "value": "c"},
+
+    "comment": {"type": "STRING", "value": "..."}
+  }
+}"#;
+
+#[test]
+fn test_node_child() {
+    let tree = parse_json_example();
+    let array_node = tree.root_node().child(0).unwrap();
+
+    assert_eq!(array_node.kind(), "array");
+    assert_eq!(array_node.named_child_count(), 3);
+    assert_eq!(array_node.start_byte(), JSON_EXAMPLE.find("[").unwrap());
+    assert_eq!(array_node.end_byte(), JSON_EXAMPLE.find("]").unwrap() + 1);
+    assert_eq!(array_node.start_position(), Point::new(2, 0));
+    assert_eq!(array_node.end_position(), Point::new(8, 1));
+    assert_eq!(array_node.child_count(), 7);
+
+    let left_bracket_node = array_node.child(0).unwrap();
+    let number_node = array_node.child(1).unwrap();
+    let comma_node1 = array_node.child(2).unwrap();
+    let false_node = array_node.child(3).unwrap();
+    let comma_node2 = array_node.child(4).unwrap();
+    let object_node = array_node.child(5).unwrap();
+    let right_bracket_node = array_node.child(6).unwrap();
+
+    assert_eq!(left_bracket_node.kind(), "[");
+    assert_eq!(number_node.kind(), "number");
+    assert_eq!(comma_node1.kind(), ",");
+    assert_eq!(false_node.kind(), "false");
+    assert_eq!(comma_node2.kind(), ",");
+    assert_eq!(object_node.kind(), "object");
+    assert_eq!(right_bracket_node.kind(), "]");
+
+    assert_eq!(left_bracket_node.is_named(), false);
+    assert_eq!(number_node.is_named(), true);
+    assert_eq!(comma_node1.is_named(), false);
+    assert_eq!(false_node.is_named(), true);
+    assert_eq!(comma_node2.is_named(), false);
+    assert_eq!(object_node.is_named(), true);
+    assert_eq!(right_bracket_node.is_named(), false);
+
+    assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap());
+    assert_eq!(
+        number_node.end_byte(),
+        JSON_EXAMPLE.find("123").unwrap() + 3
+    );
+    assert_eq!(number_node.start_position(), Point::new(3, 2));
+    assert_eq!(number_node.end_position(), Point::new(3, 5));
+
+    assert_eq!(false_node.start_byte(), JSON_EXAMPLE.find("false").unwrap());
+    assert_eq!(
+        false_node.end_byte(),
+        JSON_EXAMPLE.find("false").unwrap() + 5
+    );
+    assert_eq!(false_node.start_position(), Point::new(4, 2));
+    assert_eq!(false_node.end_position(), Point::new(4, 7));
+
+    assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find("{").unwrap());
+    assert_eq!(object_node.start_position(), Point::new(5, 2));
+    assert_eq!(object_node.end_position(), Point::new(7, 3));
+
+    assert_eq!(object_node.child_count(), 3);
+    let left_brace_node = object_node.child(0).unwrap();
+    let pair_node = object_node.child(1).unwrap();
+    let right_brace_node = object_node.child(2).unwrap();
+
+    assert_eq!(left_brace_node.kind(), "{");
+    assert_eq!(pair_node.kind(), "pair");
+    assert_eq!(right_brace_node.kind(), "}");
+
+    assert_eq!(left_brace_node.is_named(), false);
+    assert_eq!(pair_node.is_named(), true);
+    assert_eq!(right_brace_node.is_named(), false);
+
+    assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap());
+    assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
+    assert_eq!(pair_node.start_position(), Point::new(6, 4));
+    assert_eq!(pair_node.end_position(), Point::new(6, 13));
+
+    assert_eq!(pair_node.child_count(), 3);
+    let string_node = pair_node.child(0).unwrap();
+    let colon_node = pair_node.child(1).unwrap();
+    let null_node = pair_node.child(2).unwrap();
+
+    assert_eq!(string_node.kind(), "string");
+    assert_eq!(colon_node.kind(), ":");
+    assert_eq!(null_node.kind(), "null");
+
+    assert_eq!(string_node.is_named(), true);
+    assert_eq!(colon_node.is_named(), false);
+    assert_eq!(null_node.is_named(), true);
+
+    assert_eq!(
+        string_node.start_byte(),
+        JSON_EXAMPLE.find("\"x\"").unwrap()
+    );
+    assert_eq!(
+        string_node.end_byte(),
+        JSON_EXAMPLE.find("\"x\"").unwrap() + 3
+    );
+    assert_eq!(string_node.start_position(), Point::new(6, 4));
+    assert_eq!(string_node.end_position(), Point::new(6, 7));
+
+    assert_eq!(null_node.start_byte(), JSON_EXAMPLE.find("null").unwrap());
+    assert_eq!(null_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
+    assert_eq!(null_node.start_position(), Point::new(6, 9));
+    assert_eq!(null_node.end_position(), Point::new(6, 13));
+
+    assert_eq!(string_node.parent().unwrap(), pair_node);
+    assert_eq!(null_node.parent().unwrap(), pair_node);
+    assert_eq!(pair_node.parent().unwrap(), object_node);
+    assert_eq!(number_node.parent().unwrap(), array_node);
+    assert_eq!(false_node.parent().unwrap(), array_node);
+    assert_eq!(object_node.parent().unwrap(), array_node);
+    assert_eq!(array_node.parent().unwrap(), tree.root_node());
+    assert_eq!(tree.root_node().parent(), None);
+}
+
+#[test]
+fn test_node_named_child() {
+    let tree = parse_json_example();
+    let array_node = tree.root_node().child(0).unwrap();
+
+    let number_node = array_node.named_child(0).unwrap();
+    let false_node = array_node.named_child(1).unwrap();
+    let object_node = array_node.named_child(2).unwrap();
+
+    assert_eq!(number_node.kind(), "number");
+    assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap());
+    assert_eq!(
+        number_node.end_byte(),
+        JSON_EXAMPLE.find("123").unwrap() + 3
+    );
+    assert_eq!(number_node.start_position(), Point::new(3, 2));
+    assert_eq!(number_node.end_position(), Point::new(3, 5));
+
+    assert_eq!(false_node.kind(), "false");
+    assert_eq!(false_node.start_byte(), JSON_EXAMPLE.find("false").unwrap());
+    assert_eq!(
+        false_node.end_byte(),
+        JSON_EXAMPLE.find("false").unwrap() + 5
+    );
+    assert_eq!(false_node.start_position(), Point::new(4, 2));
+    assert_eq!(false_node.end_position(), Point::new(4, 7));
+
+    assert_eq!(object_node.kind(), "object");
+    assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find("{").unwrap());
+    assert_eq!(object_node.start_position(), Point::new(5, 2));
+    assert_eq!(object_node.end_position(), Point::new(7, 3));
+
+    assert_eq!(object_node.named_child_count(), 1);
+
+    let pair_node = object_node.named_child(0).unwrap();
+    assert_eq!(pair_node.kind(), "pair");
+    assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap());
+    assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
+    assert_eq!(pair_node.start_position(), Point::new(6, 4));
+    assert_eq!(pair_node.end_position(), Point::new(6, 13));
+
+    let string_node = pair_node.named_child(0).unwrap();
+    let null_node = pair_node.named_child(1).unwrap();
+
+    assert_eq!(string_node.kind(), "string");
+    assert_eq!(null_node.kind(), "null");
+
+    assert_eq!(
+        string_node.start_byte(),
+        JSON_EXAMPLE.find("\"x\"").unwrap()
+    );
+    assert_eq!(
+        string_node.end_byte(),
+        JSON_EXAMPLE.find("\"x\"").unwrap() + 3
+    );
+    assert_eq!(string_node.start_position(), Point::new(6, 4));
+    assert_eq!(string_node.end_position(), Point::new(6, 7));
+
+    assert_eq!(null_node.start_byte(), JSON_EXAMPLE.find("null").unwrap());
+    assert_eq!(null_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
+    assert_eq!(null_node.start_position(), Point::new(6, 9));
+    assert_eq!(null_node.end_position(), Point::new(6, 13));
+
+    assert_eq!(string_node.parent().unwrap(), pair_node);
+    assert_eq!(null_node.parent().unwrap(), pair_node);
+    assert_eq!(pair_node.parent().unwrap(), object_node);
+    assert_eq!(number_node.parent().unwrap(), array_node);
+    assert_eq!(false_node.parent().unwrap(), array_node);
+    assert_eq!(object_node.parent().unwrap(), array_node);
+    assert_eq!(array_node.parent().unwrap(), tree.root_node());
+    assert_eq!(tree.root_node().parent(), None);
+}
+
+#[test]
+fn test_node_named_child_with_aliases_and_extras() {
+    let (parser_name, parser_code) =
+        generate_parser_for_grammar(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();
+
+    let mut parser = Parser::new();
+    parser
+        .set_language(get_test_language(&parser_name, &parser_code, None))
+        .unwrap();
+
+    let tree = parser.parse_str("b ... b ... c", None).unwrap();
+    let root = tree.root_node();
+    assert_eq!(root.to_sexp(), "(a (b) (comment) (B) (comment) (C))");
+    assert_eq!(root.named_child_count(), 5);
+    assert_eq!(root.named_child(0).unwrap().kind(), "b");
+    assert_eq!(root.named_child(1).unwrap().kind(), "comment");
+    assert_eq!(root.named_child(2).unwrap().kind(), "B");
+    assert_eq!(root.named_child(3).unwrap().kind(), "comment");
+    assert_eq!(root.named_child(4).unwrap().kind(), "C");
+}
+
+#[test]
+fn test_node_descendant_for_range() {
+    let tree = parse_json_example();
+    let array_node = tree.root_node().child(0).unwrap();
+
+    let colon_index = JSON_EXAMPLE.find(":").unwrap();
+    let node1 = array_node
+        .descendant_for_byte_range(colon_index, colon_index)
+        .unwrap();
+    assert_eq!(node1.kind(), ":");
+    assert_eq!(node1.start_byte(), colon_index);
+    assert_eq!(node1.end_byte(), colon_index + 1);
+    assert_eq!(node1.start_position(), Point::new(6, 7));
+    assert_eq!(node1.end_position(), Point::new(6, 8));
+
+    let string_index = JSON_EXAMPLE.find("\"x\"").unwrap();
+    let node2 = array_node
+        .descendant_for_byte_range(string_index + 2, string_index + 4)
+        .unwrap();
+    assert_eq!(node2.kind(), "pair");
+    assert_eq!(node2.start_byte(), string_index);
+    assert_eq!(node2.end_byte(), string_index + 9);
+    assert_eq!(node2.start_position(), Point::new(6, 4));
+    assert_eq!(node2.end_position(), Point::new(6, 13));
+
+    assert_eq!(node1.parent(), Some(node2));
+
+    let node3 = array_node
+        .named_descendant_for_byte_range(string_index, string_index + 2)
+        .unwrap();
+    assert_eq!(node3.kind(), "string");
+    assert_eq!(node3.start_byte(), string_index);
+    assert_eq!(node3.end_byte(), string_index + 3);
+
+    // no leaf spans the given range - return the smallest node that does span it.
+    let node4 = array_node
+        .named_descendant_for_byte_range(string_index, string_index + 3)
+        .unwrap();
+    assert_eq!(node4.kind(), "pair");
+    assert_eq!(node4.start_byte(), string_index);
+    assert_eq!(node4.end_byte(), string_index + 9);
+}
+
+#[test]
+fn test_node_edit() {
+    let mut code = JSON_EXAMPLE.as_bytes().to_vec();
+    let mut tree = parse_json_example();
+    let mut rand = Rand::new(0);
+
+    for _ in 0..10 {
+        let mut nodes_before = get_all_nodes(&tree);
+
+        let edit = get_random_edit(&mut rand, &mut code);
+        let mut tree2 = tree.clone();
+        let edit = perform_edit(&mut tree2, &mut code, &edit);
+        for node in nodes_before.iter_mut() {
+            node.edit(&edit);
+        }
+
+        let nodes_after = get_all_nodes(&tree2);
+        for (i, node) in nodes_before.into_iter().enumerate() {
+            assert_eq!(
+                (
+                    node.kind(),
+                    node.start_byte(),
+                    node.start_position()
+                ),
+                (
+                    nodes_after[i].kind(),
+                    nodes_after[i].start_byte(),
+                    nodes_after[i].start_position()
+                ),
+            );
+        }
+
+        tree = tree2;
+    }
+}
+
+fn get_all_nodes(tree: &Tree) -> Vec<Node> {
+    let mut result = Vec::new();
+    let mut visited_children = false;
+    let mut cursor = tree.walk();
+    loop {
+        result.push(cursor.node());
+        if !visited_children && cursor.goto_first_child() {
+            continue;
+        } else if cursor.goto_next_sibling() {
+            visited_children = false;
+        } else if cursor.goto_parent() {
+            visited_children = true;
+        } else {
+            break;
+        }
+    }
+    return result;
+}
+
+fn parse_json_example() -> Tree {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("json")).unwrap();
+    parser.parse_str(JSON_EXAMPLE, None).unwrap()
+}
diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs
index 9e04ed35..26335a09 100644
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@@ -406,14 +406,7 @@ impl Tree {
     }
 
     pub fn edit(&mut self, edit: &InputEdit) {
-        let edit = ffi::TSInputEdit {
-            start_byte: edit.start_byte as u32,
-            old_end_byte: edit.old_end_byte as u32,
-            new_end_byte: edit.new_end_byte as u32,
-            start_point: edit.start_position.into(),
-            old_end_point: edit.old_end_position.into(),
-            new_end_point: edit.new_end_position.into(),
-        };
+        let edit = edit.into();
         unsafe { ffi::ts_tree_edit(self.0, &edit) };
     }
 
@@ -615,6 +608,11 @@ impl<'tree> Node<'tree> {
     pub fn walk(&self) -> TreeCursor<'tree> {
         TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData)
     }
+
+    pub fn edit(&mut self, edit: &InputEdit) {
+        let edit = edit.into();
+        unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) }
+    }
 }
 
 impl<'a> PartialEq for Node<'a> {
@@ -832,6 +830,19 @@ impl From<ffi::TSRange> for Range {
     }
 }
 
+impl<'a> Into<ffi::TSInputEdit> for &'a InputEdit {
+    fn into(self) -> ffi::TSInputEdit {
+        ffi::TSInputEdit {
+            start_byte: self.start_byte as u32,
+            old_end_byte: self.old_end_byte as u32,
+            new_end_byte: self.new_end_byte as u32,
+            start_point: self.start_position.into(),
+            old_end_point: self.old_end_position.into(),
+            new_end_point: self.new_end_position.into(),
+        }
+    }
+}
+
 impl<P> PropertySheet<P> {
     pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError>
     where

From ce040e21e19ebf521a22c3e85d75383e66847a5c Mon Sep 17 00:00:00 2001
From: Phil Turnbull <philipturnbull@github.com>
Date: Sat, 2 Feb 2019 22:14:54 +0000
Subject: [PATCH 206/208] trace-pc-guard is unsupported in newer versions of
 clang

---
 script/build-fuzzers | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/build-fuzzers b/script/build-fuzzers
index 0a19bc4c..d48e1da1 100755
--- a/script/build-fuzzers
+++ b/script/build-fuzzers
@@ -15,7 +15,7 @@ CC=${CC:-clang}
 CXX=${CXX:-clang++}
 LINK=${LINK:-clang++}
 
-default_fuzz_flags="-fsanitize=address,undefined -fsanitize-coverage=trace-pc-guard"
+default_fuzz_flags="-fsanitize=fuzzer,address,undefined"
 
 CFLAGS=${CFLAGS:-"$default_fuzz_flags"}
 CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"}

From d102c473e8f17996659bf01a2be5b04a11f89653 Mon Sep 17 00:00:00 2001
From: Phil Turnbull <philipturnbull@github.com>
Date: Sat, 2 Feb 2019 22:15:04 +0000
Subject: [PATCH 207/208] Remove invalid characters from grammar names

---
 script/build-fuzzers | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/script/build-fuzzers b/script/build-fuzzers
index d48e1da1..2a44b10c 100755
--- a/script/build-fuzzers
+++ b/script/build-fuzzers
@@ -53,7 +53,11 @@ for lang in ${languages[@]}; do
 
   modes=(true halt false recover)
   for i in 0 2; do
-    $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="tree_sitter_$lang" \
+    # FIXME: We should extract the grammar name from grammar.js. Use the name of
+    # the directory instead. Also, the grammar name needs to be a valid C
+    # identifier so replace any '-' characters
+    ts_lang="tree_sitter_$(echo $lang | tr -- - _)"
+    $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_HALT_ON_ERROR="${modes[i]}" -D TS_LANG="$ts_lang" \
       "test/fuzz/fuzzer.cc" "${objects[@]}" \
       libtree-sitter.a "$LIB_FUZZER_PATH" \
       -o "out/${lang}_fuzzer_${modes[i+1]}"

From 6df2adc8032ca672d898822e50681478a66b6697 Mon Sep 17 00:00:00 2001
From: Phil Turnbull <philipturnbull@github.com>
Date: Sat, 2 Feb 2019 22:13:22 +0000
Subject: [PATCH 208/208] clang must be >= 7

---
 test/fuzz/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/fuzz/README.md b/test/fuzz/README.md
index d5040b95..649d2d89 100644
--- a/test/fuzz/README.md
+++ b/test/fuzz/README.md
@@ -15,7 +15,7 @@ cd compiler-rt/lib/fuzzer
 
 ## clang
 
-Using libFuzzer requires a reasonably new version of `clang` and will probably _not_ work with your system-installed version. The easiest way to get started is to use the version provided by the Chromium team. Instructions are available at [libFuzzer.info](http://libfuzzer.info).
+Using libFuzzer requires at least version 7 of `clang` and may _not_ work with your system-installed version. If your system-installed version is too old, the easiest way to get started is to use the version provided by the Chromium team. Instructions are available at [libFuzzer.info](http://libfuzzer.info).
 
 The fuzzers can then be built with:
 ```