Merge branch 'master' into actions-ci

2021-02-05 10:19:05 -08:00 · 2021-02-05 10:19:05 -08:00 · 6dbe6a3a90
commit 6dbe6a3a90
parent 82dad76b98 f462f0d09d
27 changed files with 932 additions and 460 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -123,9 +123,9 @@ dependencies = [

 [[package]]
 name = "cc"
-version = "1.0.25"
+version = "1.0.66"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
+checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"

 [[package]]
 name = "cfg-if"
@ -272,6 +272,15 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"

+[[package]]
+name = "html-escape"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d348900ce941b7474395ba922ed3735a517df4546a2939ddb416ce85eeaa988e"
+dependencies = [
+ "utf8-width",
+]
+
 [[package]]
 name = "idna"
 version = "0.1.5"
@ -832,7 +841,7 @@ dependencies = [

 [[package]]
 name = "tree-sitter-cli"
-version = "0.17.3"
+version = "0.18.0"
 dependencies = [
 "ansi_term",
 "atty",
@ -841,6 +850,7 @@ dependencies = [
 "difference",
 "dirs",
 "glob",
+ "html-escape",
 "lazy_static",
 "libloading",
 "log",
@ -940,6 +950,12 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"

+[[package]]
+name = "utf8-width"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa"
+
 [[package]]
 name = "vec_map"
 version = "0.8.1"
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter-cli"
 description = "CLI tool for developing, testing, and using Tree-sitter parsers"
-version = "0.17.3"
+version = "0.18.0"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 license = "MIT"
@ -20,7 +20,7 @@ harness = false

 [dependencies]
 ansi_term = "0.11"
-cc = "1.0"
+cc = "^1.0.58"
 atty = "0.2"
 clap = "2.32"
 difference = "2.0"
@ -36,6 +36,7 @@ serde_derive = "1.0"
 smallbitvec = "2.3.0"
 tiny_http = "0.6"
 webbrowser = "0.5.1"
+html-escape = "0.2.6"

 [dependencies.tree-sitter]
 version = ">= 0.17.0"
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@ -1,6 +1,6 @@
 {
  "name": "tree-sitter-cli",
-  "version": "0.17.3",
+  "version": "0.18.0",
  "author": "Max Brunsfeld",
  "license": "MIT",
  "repository": {
--- a/cli/src/generate/build_tables/build_parse_table.rs
+++ b/cli/src/generate/build_tables/build_parse_table.rs
@ -577,7 +577,7 @@ impl<'a> ParseTableBuilder<'a> {
                        "(precedence: {}, associativity: {:?})",
                        precedence, associativity
                    ))
-                } else if precedence > 0 {
+                } else if precedence != 0 {
                    Some(format!("(precedence: {})", precedence))
                } else {
                    None
@ -619,6 +619,28 @@ impl<'a> ParseTableBuilder<'a> {
        }
        shift_items.sort_unstable();
        reduce_items.sort_unstable();
+
+        let list_rule_names = |mut msg: &mut String, items: &[&ParseItem]| {
+            let mut last_rule_id = None;
+            for item in items {
+                if last_rule_id == Some(item.variable_index) {
+                    continue;
+                }
+
+                if last_rule_id.is_some() {
+                    write!(&mut msg, " and").unwrap();
+                }
+
+                last_rule_id = Some(item.variable_index);
+                write!(
+                    msg,
+                    " `{}`",
+                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
+                )
+                .unwrap();
+            }
+        };
+
        if actual_conflict.len() > 1 {
            if shift_items.len() > 0 {
                resolution_count += 1;
@ -628,17 +650,7 @@ impl<'a> ParseTableBuilder<'a> {
                    resolution_count
                )
                .unwrap();
-                for (i, item) in shift_items.iter().enumerate() {
-                    if i > 0 {
-                        write!(&mut msg, " and").unwrap();
-                    }
-                    write!(
-                        &mut msg,
-                        " `{}`",
-                        self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
-                    )
-                    .unwrap();
-                }
+                list_rule_names(&mut msg, &shift_items);
                write!(&mut msg, " than in the other rules.\n").unwrap();
            }

@ -658,21 +670,11 @@ impl<'a> ParseTableBuilder<'a> {
            resolution_count += 1;
            write!(
                &mut msg,
-                "  {}:  Specify a left or right associativity in ",
+                "  {}:  Specify a left or right associativity in",
                resolution_count
            )
            .unwrap();
-            for (i, item) in reduce_items.iter().enumerate() {
-                if i > 0 {
-                    write!(&mut msg, " and ").unwrap();
-                }
-                write!(
-                    &mut msg,
-                    "`{}`",
-                    self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
-                )
-                .unwrap();
-            }
+            list_rule_names(&mut msg, &reduce_items);
            write!(&mut msg, "\n").unwrap();
        }

--- a/cli/src/generate/build_tables/mod.rs
+++ b/cli/src/generate/build_tables/mod.rs
@ -13,7 +13,7 @@ use self::minimize_parse_table::minimize_parse_table;
 use self::token_conflicts::TokenConflictMap;
 use crate::error::Result;
 use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
-use crate::generate::nfa::{CharacterSet, NfaCursor};
+use crate::generate::nfa::NfaCursor;
 use crate::generate::node_types::VariableInfo;
 use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet};
 use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
@ -472,10 +472,8 @@ fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
    cursor.transition_chars().all(|(chars, is_sep)| {
        if is_sep {
            true
-        } else if let CharacterSet::Include(chars) = chars {
-            chars.iter().all(|c| c.is_alphabetic() || *c == '_')
        } else {
-            false
+            chars.chars().all(|c| c.is_alphabetic() || c == '_')
        }
    })
 }
--- a/cli/src/generate/nfa.rs
+++ b/cli/src/generate/nfa.rs
@ -6,10 +6,9 @@ use std::fmt;
 use std::mem::swap;
 use std::ops::Range;

-#[derive(Clone, Debug, PartialEq, Eq, Hash)]
-pub enum CharacterSet {
-    Include(Vec<char>),
-    Exclude(Vec<char>),
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct CharacterSet {
+    ranges: Vec<Range<u32>>,
 }

 #[derive(Debug, PartialEq, Eq)]
@ -52,142 +51,233 @@ impl Default for Nfa {
    }
 }

+const END: u32 = char::MAX as u32 + 1;
+
 impl CharacterSet {
    pub fn empty() -> Self {
-        CharacterSet::Include(Vec::new())
+        CharacterSet { ranges: Vec::new() }
    }

-    pub fn negate(self) -> CharacterSet {
-        match self {
-            CharacterSet::Include(chars) => CharacterSet::Exclude(chars),
-            CharacterSet::Exclude(chars) => CharacterSet::Include(chars),
+    pub fn from_range(mut first: char, mut last: char) -> Self {
+        if first > last {
+            swap(&mut first, &mut last);
+        }
+        CharacterSet {
+            ranges: vec![(first as u32)..(last as u32 + 1)],
        }
    }

-    pub fn add_char(self, c: char) -> Self {
-        if let CharacterSet::Include(mut chars) = self {
-            if let Err(i) = chars.binary_search(&c) {
-                chars.insert(i, c);
+    pub fn from_char(c: char) -> Self {
+        CharacterSet {
+            ranges: vec![(c as u32)..(c as u32 + 1)],
+        }
+    }
+
+    pub fn negate(mut self) -> CharacterSet {
+        let mut i = 0;
+        let mut previous_end = 0;
+        while i < self.ranges.len() {
+            let range = &mut self.ranges[i];
+            let start = previous_end;
+            previous_end = range.end;
+            if start < range.start {
+                self.ranges[i] = start..range.start;
+                i += 1;
+            } else {
+                self.ranges.remove(i);
            }
-            CharacterSet::Include(chars)
-        } else {
-            panic!("Called add with a negated character set");
        }
+        if previous_end < END {
+            self.ranges.push(previous_end..END);
+        }
+        self
    }

-    pub fn add_range(self, start: char, end: char) -> Self {
-        if let CharacterSet::Include(mut chars) = self {
-            let mut c = start as u32;
-            while c <= end as u32 {
-                chars.push(char::from_u32(c).unwrap());
-                c += 1;
+    pub fn add_char(mut self, c: char) -> Self {
+        self.add_int_range(0, c as u32, c as u32 + 1);
+        self
+    }
+
+    pub fn add_range(mut self, start: char, end: char) -> Self {
+        self.add_int_range(0, start as u32, end as u32 + 1);
+        self
+    }
+
+    pub fn add(mut self, other: &CharacterSet) -> Self {
+        let mut index = 0;
+        for range in &other.ranges {
+            index = self.add_int_range(index, range.start as u32, range.end as u32);
+        }
+        self
+    }
+
+    fn add_int_range(&mut self, mut i: usize, start: u32, end: u32) -> usize {
+        while i < self.ranges.len() {
+            let range = &mut self.ranges[i];
+            if range.start > end {
+                self.ranges.insert(i, start..end);
+                return i;
            }
-            chars.sort_unstable();
-            chars.dedup();
-            CharacterSet::Include(chars)
-        } else {
-            panic!("Called add with a negated character set");
-        }
-    }
-
-    pub fn add(self, other: &CharacterSet) -> Self {
-        match self {
-            CharacterSet::Include(mut chars) => match other {
-                CharacterSet::Include(other_chars) => {
-                    chars.extend(other_chars);
-                    chars.sort_unstable();
-                    chars.dedup();
-                    CharacterSet::Include(chars)
-                }
-                CharacterSet::Exclude(other_chars) => {
-                    let excluded_chars = other_chars
-                        .iter()
-                        .cloned()
-                        .filter(|c| !chars.contains(&c))
-                        .collect();
-                    CharacterSet::Exclude(excluded_chars)
-                }
-            },
-            CharacterSet::Exclude(mut chars) => match other {
-                CharacterSet::Include(other_chars) => {
-                    chars.retain(|c| !other_chars.contains(&c));
-                    CharacterSet::Exclude(chars)
-                }
-                CharacterSet::Exclude(other_chars) => {
-                    chars.retain(|c| other_chars.contains(&c));
-                    CharacterSet::Exclude(chars)
-                }
-            },
+            if range.end >= start {
+                range.end = range.end.max(end);
+                range.start = range.start.min(start);
+                return i;
+            }
+            i += 1;
        }
+        self.ranges.push(start..end);
+        i
    }

    pub fn does_intersect(&self, other: &CharacterSet) -> bool {
-        match self {
-            CharacterSet::Include(chars) => match other {
-                CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).common,
-                CharacterSet::Exclude(other_chars) => compare_chars(chars, other_chars).left_only,
-            },
-            CharacterSet::Exclude(chars) => match other {
-                CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).right_only,
-                CharacterSet::Exclude(_) => true,
-            },
+        let mut left_ranges = self.ranges.iter();
+        let mut right_ranges = other.ranges.iter();
+        let mut left_range = left_ranges.next();
+        let mut right_range = right_ranges.next();
+        while let (Some(left), Some(right)) = (&left_range, &right_range) {
+            if left.end <= right.start {
+                left_range = left_ranges.next();
+            } else if left.start >= right.end {
+                right_range = right_ranges.next();
+            } else {
+                return true;
+            }
        }
+        false
    }

    pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet {
-        match self {
-            CharacterSet::Include(chars) => match other {
-                CharacterSet::Include(other_chars) => {
-                    CharacterSet::Include(remove_chars(chars, other_chars, true))
+        let mut intersection = Vec::new();
+        let mut left_i = 0;
+        let mut right_i = 0;
+        while left_i < self.ranges.len() && right_i < other.ranges.len() {
+            let left = &mut self.ranges[left_i];
+            let right = &mut other.ranges[right_i];
+
+            match left.start.cmp(&right.start) {
+                Ordering::Less => {
+                    // [ L ]
+                    //     [ R ]
+                    if left.end <= right.start {
+                        left_i += 1;
+                        continue;
+                    }
+
+                    match left.end.cmp(&right.end) {
+                        // [ L ]
+                        //   [ R ]
+                        Ordering::Less => {
+                            intersection.push(right.start..left.end);
+                            swap(&mut left.end, &mut right.start);
+                            left_i += 1;
+                        }
+
+                        // [  L  ]
+                        //   [ R ]
+                        Ordering::Equal => {
+                            intersection.push(right.clone());
+                            left.end = right.start;
+                            other.ranges.remove(right_i);
+                        }
+
+                        // [   L   ]
+                        //   [ R ]
+                        Ordering::Greater => {
+                            intersection.push(right.clone());
+                            let new_range = left.start..right.start;
+                            left.start = right.end;
+                            self.ranges.insert(left_i, new_range);
+                            other.ranges.remove(right_i);
+                            left_i += 1;
+                        }
+                    }
                }
-                CharacterSet::Exclude(other_chars) => {
-                    let mut removed = remove_chars(chars, other_chars, false);
-                    add_chars(other_chars, chars);
-                    swap(&mut removed, chars);
-                    CharacterSet::Include(removed)
+                Ordering::Equal => {
+                    // [ L ]
+                    // [  R  ]
+                    if left.end < right.end {
+                        intersection.push(left.start..left.end);
+                        right.start = left.end;
+                        self.ranges.remove(left_i);
+                    }
+                    // [ L ]
+                    // [ R ]
+                    else if left.end == right.end {
+                        intersection.push(left.clone());
+                        self.ranges.remove(left_i);
+                        other.ranges.remove(right_i);
+                    }
+                    // [  L  ]
+                    // [ R ]
+                    else if left.end > right.end {
+                        intersection.push(right.clone());
+                        left.start = right.end;
+                        other.ranges.remove(right_i);
+                    }
                }
-            },
-            CharacterSet::Exclude(chars) => match other {
-                CharacterSet::Include(other_chars) => {
-                    let mut removed = remove_chars(other_chars, chars, false);
-                    add_chars(chars, other_chars);
-                    swap(&mut removed, other_chars);
-                    CharacterSet::Include(removed)
+                Ordering::Greater => {
+                    //     [ L ]
+                    // [ R ]
+                    if left.start >= right.end {
+                        right_i += 1;
+                        continue;
+                    }
+
+                    match left.end.cmp(&right.end) {
+                        //   [ L ]
+                        // [   R   ]
+                        Ordering::Less => {
+                            intersection.push(left.clone());
+                            let new_range = right.start..left.start;
+                            right.start = left.end;
+                            other.ranges.insert(right_i, new_range);
+                            self.ranges.remove(left_i);
+                            right_i += 1;
+                        }
+
+                        //   [ L ]
+                        // [  R  ]
+                        Ordering::Equal => {
+                            intersection.push(left.clone());
+                            right.end = left.start;
+                            self.ranges.remove(left_i);
+                        }
+
+                        //   [   L   ]
+                        // [   R   ]
+                        Ordering::Greater => {
+                            intersection.push(left.start..right.end);
+                            swap(&mut left.start, &mut right.end);
+                            right_i += 1;
+                        }
+                    }
                }
-                CharacterSet::Exclude(other_chars) => {
-                    let mut result_exclusion = chars.clone();
-                    result_exclusion.extend(other_chars.iter().cloned());
-                    result_exclusion.sort_unstable();
-                    result_exclusion.dedup();
-                    remove_chars(chars, other_chars, true);
-                    let mut included_characters = Vec::new();
-                    let mut other_included_characters = Vec::new();
-                    swap(&mut included_characters, other_chars);
-                    swap(&mut other_included_characters, chars);
-                    *self = CharacterSet::Include(included_characters);
-                    *other = CharacterSet::Include(other_included_characters);
-                    CharacterSet::Exclude(result_exclusion)
-                }
-            },
+            }
        }
+        CharacterSet {
+            ranges: intersection,
+        }
+    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
+        self.ranges.iter().flat_map(|r| r.clone())
+    }
+
+    pub fn chars<'a>(&'a self) -> impl Iterator<Item = char> + 'a {
+        self.iter().filter_map(char::from_u32)
    }

    pub fn is_empty(&self) -> bool {
-        if let CharacterSet::Include(c) = self {
-            c.is_empty()
-        } else {
-            false
-        }
+        self.ranges.is_empty()
    }

-    pub fn ranges<'a>(
-        chars: &'a Vec<char>,
+    pub fn simplify_ignoring<'a>(
+        &'a self,
        ruled_out_characters: &'a HashSet<u32>,
-    ) -> impl Iterator<Item = Range<char>> + 'a {
+    ) -> Vec<Range<char>> {
        let mut prev_range: Option<Range<char>> = None;
-        chars
-            .iter()
-            .map(|c| (*c, false))
+        self.chars()
+            .map(|c| (c, false))
            .chain(Some(('\0', true)))
            .filter_map(move |(c, done)| {
                if done {
@ -212,35 +302,40 @@ impl CharacterSet {
                    None
                }
            })
+            .collect()
    }

-    #[cfg(test)]
    pub fn contains(&self, c: char) -> bool {
-        match self {
-            CharacterSet::Include(chars) => chars.contains(&c),
-            CharacterSet::Exclude(chars) => !chars.contains(&c),
-        }
+        self.ranges.iter().any(|r| r.contains(&(c as u32)))
    }
 }

 impl Ord for CharacterSet {
    fn cmp(&self, other: &CharacterSet) -> Ordering {
-        match self {
-            CharacterSet::Include(chars) => {
-                if let CharacterSet::Include(other_chars) = other {
-                    order_chars(chars, other_chars)
-                } else {
-                    Ordering::Less
-                }
+        let count_cmp = self
+            .ranges
+            .iter()
+            .map(|r| r.len())
+            .sum::<usize>()
+            .cmp(&other.ranges.iter().map(|r| r.len()).sum());
+        if count_cmp != Ordering::Equal {
+            return count_cmp;
+        }
+
+        for (left_range, right_range) in self.ranges.iter().zip(other.ranges.iter()) {
+            let cmp = left_range.len().cmp(&right_range.len());
+            if cmp != Ordering::Equal {
+                return cmp;
            }
-            CharacterSet::Exclude(chars) => {
-                if let CharacterSet::Exclude(other_chars) = other {
-                    order_chars(chars, other_chars)
-                } else {
-                    Ordering::Greater
+
+            for (left, right) in left_range.clone().zip(right_range.clone()) {
+                let cmp = left.cmp(&right);
+                if cmp != Ordering::Equal {
+                    return cmp;
                }
            }
        }
+        return Ordering::Equal;
    }
 }

@ -250,89 +345,22 @@ impl PartialOrd for CharacterSet {
    }
 }

-fn add_chars(left: &mut Vec<char>, right: &Vec<char>) {
-    for c in right {
-        match left.binary_search(c) {
-            Err(i) => left.insert(i, *c),
-            _ => {}
+impl fmt::Debug for CharacterSet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "CharacterSet [")?;
+        let mut set = self.clone();
+        if self.contains(char::MAX) {
+            write!(f, "^ ")?;
+            set = set.negate();
        }
-    }
-}
-
-fn remove_chars(left: &mut Vec<char>, right: &mut Vec<char>, mutate_right: bool) -> Vec<char> {
-    let mut result = Vec::new();
-    right.retain(|right_char| {
-        if let Some(index) = left.iter().position(|left_char| *left_char == *right_char) {
-            left.remove(index);
-            result.push(*right_char);
-            false || !mutate_right
-        } else {
-            true
-        }
-    });
-    result
-}
-
-struct SetComparision {
-    left_only: bool,
-    common: bool,
-    right_only: bool,
-}
-
-fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
-    let mut result = SetComparision {
-        left_only: false,
-        common: false,
-        right_only: false,
-    };
-    let mut left = left.iter().cloned();
-    let mut right = right.iter().cloned();
-    let mut i = left.next();
-    let mut j = right.next();
-    while let (Some(left_char), Some(right_char)) = (i, j) {
-        if left_char < right_char {
-            i = left.next();
-            result.left_only = true;
-        } else if left_char > right_char {
-            j = right.next();
-            result.right_only = true;
-        } else {
-            i = left.next();
-            j = right.next();
-            result.common = true;
-        }
-    }
-
-    match (i, j) {
-        (Some(_), _) => result.left_only = true,
-        (_, Some(_)) => result.right_only = true,
-        _ => {}
-    }
-
-    result
-}
-
-fn order_chars(chars: &Vec<char>, other_chars: &Vec<char>) -> Ordering {
-    if chars.is_empty() {
-        if other_chars.is_empty() {
-            Ordering::Equal
-        } else {
-            Ordering::Less
-        }
-    } else if other_chars.is_empty() {
-        Ordering::Greater
-    } else {
-        let cmp = chars.len().cmp(&other_chars.len());
-        if cmp != Ordering::Equal {
-            return cmp;
-        }
-        for (c, other_c) in chars.iter().zip(other_chars.iter()) {
-            let cmp = c.cmp(other_c);
-            if cmp != Ordering::Equal {
-                return cmp;
+        for (i, c) in set.chars().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
            }
+            write!(f, "{:?}", c)?;
        }
-        Ordering::Equal
+        write!(f, "]")?;
+        Ok(())
    }
 }

@ -624,48 +652,46 @@ mod tests {
            // multiple negated character classes
            (
                vec![
-                    (CharacterSet::Include(vec!['a']), false, 0, 1),
-                    (CharacterSet::Exclude(vec!['a', 'b', 'c']), false, 0, 2),
-                    (CharacterSet::Include(vec!['g']), false, 0, 6),
-                    (CharacterSet::Exclude(vec!['d', 'e', 'f']), false, 0, 3),
-                    (CharacterSet::Exclude(vec!['g', 'h', 'i']), false, 0, 4),
-                    (CharacterSet::Include(vec!['g']), false, 0, 5),
+                    (CharacterSet::from_char('a'), false, 0, 1),
+                    (CharacterSet::from_range('a', 'c').negate(), false, 0, 2),
+                    (CharacterSet::from_char('g'), false, 0, 6),
+                    (CharacterSet::from_range('d', 'f').negate(), false, 0, 3),
+                    (CharacterSet::from_range('g', 'i').negate(), false, 0, 4),
+                    (CharacterSet::from_char('g'), false, 0, 5),
                ],
                vec![
                    NfaTransition {
-                        characters: CharacterSet::Include(vec!['a']),
+                        characters: CharacterSet::from_char('a'),
                        precedence: 0,
                        states: vec![1, 3, 4],
                        is_separator: false,
                    },
                    NfaTransition {
-                        characters: CharacterSet::Include(vec!['g']),
+                        characters: CharacterSet::from_char('g'),
                        precedence: 0,
                        states: vec![2, 3, 5, 6],
                        is_separator: false,
                    },
                    NfaTransition {
-                        characters: CharacterSet::Include(vec!['b', 'c']),
+                        characters: CharacterSet::from_range('b', 'c'),
                        precedence: 0,
                        states: vec![3, 4],
                        is_separator: false,
                    },
                    NfaTransition {
-                        characters: CharacterSet::Include(vec!['h', 'i']),
+                        characters: CharacterSet::from_range('h', 'i'),
                        precedence: 0,
                        states: vec![2, 3],
                        is_separator: false,
                    },
                    NfaTransition {
-                        characters: CharacterSet::Include(vec!['d', 'e', 'f']),
+                        characters: CharacterSet::from_range('d', 'f'),
                        precedence: 0,
                        states: vec![2, 4],
                        is_separator: false,
                    },
                    NfaTransition {
-                        characters: CharacterSet::Exclude(vec![
-                            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
-                        ]),
+                        characters: CharacterSet::from_range('a', 'i').negate(),
                        precedence: 0,
                        states: vec![2, 3, 4],
                        is_separator: false,
@ -675,21 +701,21 @@ mod tests {
            // disjoint characters with same state
            (
                vec![
-                    (CharacterSet::Include(vec!['a']), false, 0, 1),
-                    (CharacterSet::Include(vec!['b']), false, 0, 2),
-                    (CharacterSet::Include(vec!['c']), false, 0, 1),
-                    (CharacterSet::Include(vec!['d']), false, 0, 1),
-                    (CharacterSet::Include(vec!['e']), false, 0, 2),
+                    (CharacterSet::from_char('a'), false, 0, 1),
+                    (CharacterSet::from_char('b'), false, 0, 2),
+                    (CharacterSet::from_char('c'), false, 0, 1),
+                    (CharacterSet::from_char('d'), false, 0, 1),
+                    (CharacterSet::from_char('e'), false, 0, 2),
                ],
                vec![
                    NfaTransition {
-                        characters: CharacterSet::Include(vec!['a', 'c', 'd']),
+                        characters: CharacterSet::empty().add_char('a').add_range('c', 'd'),
                        precedence: 0,
                        states: vec![1],
                        is_separator: false,
                    },
                    NfaTransition {
-                        characters: CharacterSet::Include(vec!['b', 'e']),
+                        characters: CharacterSet::empty().add_char('b').add_char('e'),
                        precedence: 0,
                        states: vec![2],
                        is_separator: false,
@ -698,119 +724,129 @@ mod tests {
            ),
        ];

-        for row in table.iter() {
+        for (i, row) in table.iter().enumerate() {
            assert_eq!(
                NfaCursor::group_transitions(
                    row.0
                        .iter()
                        .map(|(chars, is_sep, prec, state)| (chars, *is_sep, *prec, *state))
                ),
-                row.1
+                row.1,
+                "row {}",
+                i
            );
        }
    }

    #[test]
    fn test_character_set_remove_intersection() {
-        // A whitelist and an overlapping whitelist.
-        // Both sets contain 'c', 'd', and 'f'
-        let mut a = CharacterSet::empty().add_range('a', 'f');
-        let mut b = CharacterSet::empty().add_range('c', 'h');
-        assert_eq!(
-            a.remove_intersection(&mut b),
-            CharacterSet::empty().add_range('c', 'f')
-        );
-        assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
-        assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
+        struct Row {
+            left: CharacterSet,
+            right: CharacterSet,
+            left_only: CharacterSet,
+            right_only: CharacterSet,
+            intersection: CharacterSet,
+        }

-        let mut a = CharacterSet::empty().add_range('a', 'f');
-        let mut b = CharacterSet::empty().add_range('c', 'h');
-        assert_eq!(
-            b.remove_intersection(&mut a),
-            CharacterSet::empty().add_range('c', 'f')
-        );
-        assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
-        assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
+        let rows = [
+            // [ L ]
+            //     [ R ]
+            Row {
+                left: CharacterSet::from_range('a', 'f'),
+                right: CharacterSet::from_range('g', 'm'),
+                left_only: CharacterSet::from_range('a', 'f'),
+                right_only: CharacterSet::from_range('g', 'm'),
+                intersection: CharacterSet::empty(),
+            },
+            // [ L ]
+            //   [ R ]
+            Row {
+                left: CharacterSet::from_range('a', 'f'),
+                right: CharacterSet::from_range('c', 'i'),
+                left_only: CharacterSet::from_range('a', 'b'),
+                right_only: CharacterSet::from_range('g', 'i'),
+                intersection: CharacterSet::from_range('c', 'f'),
+            },
+            // [  L  ]
+            //   [ R ]
+            Row {
+                left: CharacterSet::from_range('a', 'f'),
+                right: CharacterSet::from_range('d', 'f'),
+                left_only: CharacterSet::from_range('a', 'c'),
+                right_only: CharacterSet::empty(),
+                intersection: CharacterSet::from_range('d', 'f'),
+            },
+            // [   L   ]
+            //   [ R ]
+            Row {
+                left: CharacterSet::from_range('a', 'm'),
+                right: CharacterSet::from_range('d', 'f'),
+                left_only: CharacterSet::empty()
+                    .add_range('a', 'c')
+                    .add_range('g', 'm'),
+                right_only: CharacterSet::empty(),
+                intersection: CharacterSet::from_range('d', 'f'),
+            },
+            // [ L1 ] [ L2 ]
+            //    [  R  ]
+            Row {
+                left: CharacterSet::empty()
+                    .add_range('a', 'e')
+                    .add_range('h', 'l'),
+                right: CharacterSet::from_range('c', 'i'),
+                left_only: CharacterSet::empty()
+                    .add_range('a', 'b')
+                    .add_range('j', 'l'),
+                right_only: CharacterSet::from_range('f', 'g'),
+                intersection: CharacterSet::empty()
+                    .add_range('c', 'e')
+                    .add_range('h', 'i'),
+            },
+        ];

-        // A whitelist and a larger whitelist.
-        let mut a = CharacterSet::empty().add_char('c');
-        let mut b = CharacterSet::empty().add_range('a', 'e');
-        assert_eq!(
-            a.remove_intersection(&mut b),
-            CharacterSet::empty().add_char('c')
-        );
-        assert_eq!(a, CharacterSet::empty());
-        assert_eq!(
-            b,
-            CharacterSet::empty()
-                .add_range('a', 'b')
-                .add_range('d', 'e')
-        );
+        for (i, row) in rows.iter().enumerate() {
+            let mut left = row.left.clone();
+            let mut right = row.right.clone();
+            assert_eq!(
+                left.remove_intersection(&mut right),
+                row.intersection,
+                "row {}a: {:?} && {:?}",
+                i,
+                row.left,
+                row.right
+            );
+            assert_eq!(
+                left, row.left_only,
+                "row {}a: {:?} - {:?}",
+                i, row.left, row.right
+            );
+            assert_eq!(
+                right, row.right_only,
+                "row {}a: {:?} - {:?}",
+                i, row.right, row.left
+            );

-        let mut a = CharacterSet::empty().add_char('c');
-        let mut b = CharacterSet::empty().add_range('a', 'e');
-        assert_eq!(
-            b.remove_intersection(&mut a),
-            CharacterSet::empty().add_char('c')
-        );
-        assert_eq!(a, CharacterSet::empty());
-        assert_eq!(
-            b,
-            CharacterSet::empty()
-                .add_range('a', 'b')
-                .add_range('d', 'e')
-        );
-
-        // An inclusion and an intersecting exclusion.
-        // Both sets contain 'e', 'f', and 'm'
-        let mut a = CharacterSet::empty()
-            .add_range('c', 'h')
-            .add_range('k', 'm');
-        let mut b = CharacterSet::empty()
-            .add_range('a', 'd')
-            .add_range('g', 'l')
-            .negate();
-        assert_eq!(
-            a.remove_intersection(&mut b),
-            CharacterSet::Include(vec!['e', 'f', 'm'])
-        );
-        assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
-        assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
-
-        let mut a = CharacterSet::empty()
-            .add_range('c', 'h')
-            .add_range('k', 'm');
-        let mut b = CharacterSet::empty()
-            .add_range('a', 'd')
-            .add_range('g', 'l')
-            .negate();
-        assert_eq!(
-            b.remove_intersection(&mut a),
-            CharacterSet::Include(vec!['e', 'f', 'm'])
-        );
-        assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
-        assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
-
-        // An exclusion and an overlapping inclusion.
-        // Both sets exclude 'c', 'd', and 'e'
-        let mut a = CharacterSet::empty().add_range('a', 'e').negate();
-        let mut b = CharacterSet::empty().add_range('c', 'h').negate();
-        assert_eq!(
-            a.remove_intersection(&mut b),
-            CharacterSet::empty().add_range('a', 'h').negate(),
-        );
-        assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
-        assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
-
-        // An exclusion and a larger exclusion.
-        let mut a = CharacterSet::empty().add_range('b', 'c').negate();
-        let mut b = CharacterSet::empty().add_range('a', 'd').negate();
-        assert_eq!(
-            a.remove_intersection(&mut b),
-            CharacterSet::empty().add_range('a', 'd').negate(),
-        );
-        assert_eq!(a, CharacterSet::empty().add_char('a').add_char('d'));
-        assert_eq!(b, CharacterSet::empty());
+            let mut left = row.left.clone();
+            let mut right = row.right.clone();
+            assert_eq!(
+                right.remove_intersection(&mut left),
+                row.intersection,
+                "row {}b: {:?} && {:?}",
+                i,
+                row.left,
+                row.right
+            );
+            assert_eq!(
+                left, row.left_only,
+                "row {}b: {:?} - {:?}",
+                i, row.left, row.right
+            );
+            assert_eq!(
+                right, row.right_only,
+                "row {}b: {:?} - {:?}",
+                i, row.right, row.left
+            );
+        }
    }

    #[test]
@ -834,29 +870,29 @@ mod tests {
        assert!(!b.does_intersect(&a));

        let (a, b) = (
-            CharacterSet::Include(vec!['b']),
-            CharacterSet::Exclude(vec!['a', 'b', 'c']),
+            CharacterSet::from_char('b'),
+            CharacterSet::from_range('a', 'c'),
+        );
+        assert!(a.does_intersect(&b));
+        assert!(b.does_intersect(&a));
+
+        let (a, b) = (
+            CharacterSet::from_char('b'),
+            CharacterSet::from_range('a', 'c').negate(),
        );
        assert!(!a.does_intersect(&b));
        assert!(!b.does_intersect(&a));

        let (a, b) = (
-            CharacterSet::Include(vec!['b']),
-            CharacterSet::Exclude(vec!['a', 'c']),
+            CharacterSet::from_char('a').negate(),
+            CharacterSet::from_char('a').negate(),
        );
        assert!(a.does_intersect(&b));
        assert!(b.does_intersect(&a));

        let (a, b) = (
-            CharacterSet::Exclude(vec!['a']),
-            CharacterSet::Exclude(vec!['a']),
-        );
-        assert!(a.does_intersect(&b));
-        assert!(b.does_intersect(&a));
-
-        let (a, b) = (
-            CharacterSet::Include(vec!['c']),
-            CharacterSet::Exclude(vec!['a']),
+            CharacterSet::from_char('c'),
+            CharacterSet::from_char('a').negate(),
        );
        assert!(a.does_intersect(&b));
        assert!(b.does_intersect(&a));
@ -898,7 +934,11 @@ mod tests {
                .into_iter()
                .map(|c: &char| *c as u32)
                .collect();
-            let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
+            let mut set = CharacterSet::empty();
+            for c in chars {
+                set = set.add_char(*c);
+            }
+            let ranges = set.simplify_ignoring(&ruled_out_chars);
            assert_eq!(ranges, *expected_ranges);
        }
    }
--- a/cli/src/generate/prepare_grammar/expand_tokens.rs
+++ b/cli/src/generate/prepare_grammar/expand_tokens.rs
@ -12,7 +12,7 @@ use std::i32;

 lazy_static! {
    static ref CURLY_BRACE_REGEX: Regex =
-        Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-F,}][^}]*)\}"#).unwrap();
+        Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap();
 }

 const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
@ -198,11 +198,11 @@ impl NfaBuilder {
            Ast::Empty(_) => Ok(false),
            Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
            Ast::Literal(literal) => {
-                self.push_advance(CharacterSet::Include(vec![literal.c]), next_state_id);
+                self.push_advance(CharacterSet::from_char(literal.c), next_state_id);
                Ok(true)
            }
            Ast::Dot(_) => {
-                self.push_advance(CharacterSet::Exclude(vec!['\n']), next_state_id);
+                self.push_advance(CharacterSet::from_char('\n').negate(), next_state_id);
                Ok(true)
            }
            Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
@ -344,11 +344,9 @@ impl NfaBuilder {

    fn expand_character_class(&self, item: &ClassSetItem) -> Result<CharacterSet> {
        match item {
-            ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
-            ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
-            ClassSetItem::Range(range) => {
-                Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
-            }
+            ClassSetItem::Empty(_) => Ok(CharacterSet::empty()),
+            ClassSetItem::Literal(literal) => Ok(CharacterSet::from_char(literal.c)),
+            ClassSetItem::Range(range) => Ok(CharacterSet::from_range(range.start.c, range.end.c)),
            ClassSetItem::Union(union) => {
                let mut result = CharacterSet::empty();
                for item in &union.items {
@ -366,7 +364,7 @@ impl NfaBuilder {

    fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet {
        match item {
-            ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
+            ClassPerlKind::Digit => CharacterSet::from_range('0', '9'),
            ClassPerlKind::Space => CharacterSet::empty()
                .add_char(' ')
                .add_char('\t')
@ -653,12 +651,15 @@ mod tests {
                    Rule::pattern(r#"\{[ab]{3}\}"#),
                    // Unicode codepoints
                    Rule::pattern(r#"\u{1000A}"#),
+                    // Unicode codepoints (lowercase)
+                    Rule::pattern(r#"\u{1000b}"#),
                ],
                separators: vec![],
                examples: vec![
                    ("u{1234} ok", Some((0, "u{1234}"))),
                    ("{aba}}", Some((1, "{aba}"))),
                    ("\u{1000A}", Some((2, "\u{1000A}"))),
+                    ("\u{1000b}", Some((3, "\u{1000b}"))),
                ],
            },
        ];
--- a/cli/src/generate/render.rs
+++ b/cli/src/generate/render.rs
@ -1,5 +1,4 @@
 use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
-use super::nfa::CharacterSet;
 use super::rules::{Alias, AliasMap, Symbol, SymbolType};
 use super::tables::{
    AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
@ -659,21 +658,19 @@ impl Generator {
                    .advance_actions
                    .iter()
                    .map(|(chars, action)| {
-                        let (chars, is_included) = match chars {
-                            CharacterSet::Include(c) => (c, true),
-                            CharacterSet::Exclude(c) => (c, false),
-                        };
-                        let mut call_id = None;
-                        let mut ranges =
-                            CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
+                        let is_included = !chars.contains(std::char::MAX);
+                        let mut ranges;
                        if is_included {
-                            ruled_out_chars.extend(chars.iter().map(|c| *c as u32));
+                            ranges = chars.simplify_ignoring(&ruled_out_chars);
+                            ruled_out_chars.extend(chars.iter());
                        } else {
+                            ranges = chars.clone().negate().simplify_ignoring(&ruled_out_chars);
                            ranges.insert(0, '\0'..'\0')
                        }

                        // Record any large character sets so that they can be extracted
                        // into helper functions, reducing code duplication.
+                        let mut call_id = None;
                        if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT {
                            let char_set_symbol = self
                                .symbol_for_advance_action(action, &lex_table)
@ -887,11 +884,16 @@ impl Generator {
                    add!(self, " &&{}lookahead != ", line_break);
                    self.add_character(range.end);
                } else {
-                    add!(self, "(lookahead < ");
-                    self.add_character(range.start);
-                    add!(self, " || ");
-                    self.add_character(range.end);
-                    add!(self, " < lookahead)");
+                    if range.start != '\0' {
+                        add!(self, "(lookahead < ");
+                        self.add_character(range.start);
+                        add!(self, " || ");
+                        self.add_character(range.end);
+                        add!(self, " < lookahead)");
+                    } else {
+                        add!(self, "lookahead > ");
+                        self.add_character(range.end);
+                    }
                }
            }
            did_add = true;
--- a/cli/src/generate/templates/index.js
+++ b/cli/src/generate/templates/index.js
@ -1,10 +1,14 @@
 try {
  module.exports = require("./build/Release/tree_sitter_PARSER_NAME_binding");
 } catch (error) {
-  try {
-    module.exports = require("./build/Debug/tree_sitter_PARSER_NAME_binding");
-  } catch (_) {
+  if (error.code !== 'MODULE_NOT_FOUND')
    throw error
+  else try {
+    module.exports = require("./build/Debug/tree_sitter_PARSER_NAME_binding");
+  } catch (error2) {
+    if (error2.code === 'MODULE_NOT_FOUND')
+      throw error
+    throw error2
  }
 }

--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -63,6 +63,7 @@ fn run() -> error::Result<()> {
                .arg(Arg::with_name("scope").long("scope").takes_value(true))
                .arg(Arg::with_name("debug").long("debug").short("d"))
                .arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
+                .arg(Arg::with_name("debug-xml").long("xml").short("x"))
                .arg(Arg::with_name("quiet").long("quiet").short("q"))
                .arg(Arg::with_name("stat").long("stat").short("s"))
                .arg(Arg::with_name("time").long("time").short("t"))
@ -119,6 +120,7 @@ fn run() -> error::Result<()> {
                        .short("f")
                        .takes_value(true),
                )
+                .arg(Arg::with_name("update").long("update").short("u"))
                .arg(Arg::with_name("debug").long("debug").short("d"))
                .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
        )
@ -193,6 +195,7 @@ fn run() -> error::Result<()> {
    } else if let Some(matches) = matches.subcommand_matches("test") {
        let debug = matches.is_present("debug");
        let debug_graph = matches.is_present("debug-graph");
+        let update = matches.is_present("update");
        let filter = matches.value_of("filter");
        let languages = loader.languages_at_path(&current_dir)?;
        let language = languages
@ -206,7 +209,7 @@ fn run() -> error::Result<()> {
            test_corpus_dir = current_dir.join("corpus");
        }
        if test_corpus_dir.is_dir() {
-            test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter)?;
+            test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter, update)?;
        }

        // Check that all of the queries are valid.
@ -220,6 +223,7 @@ fn run() -> error::Result<()> {
    } else if let Some(matches) = matches.subcommand_matches("parse") {
        let debug = matches.is_present("debug");
        let debug_graph = matches.is_present("debug-graph");
+        let debug_xml = matches.is_present("debug-xml");
        let quiet = matches.is_present("quiet");
        let time = matches.is_present("time");
        let edits = matches
@ -255,6 +259,7 @@ fn run() -> error::Result<()> {
                timeout,
                debug,
                debug_graph,
+                debug_xml,
                Some(&cancellation_flag),
            )?;

--- a/cli/src/parse.rs
+++ b/cli/src/parse.rs
@ -40,6 +40,7 @@ pub fn parse_file_at_path(
    timeout: u64,
    debug: bool,
    debug_graph: bool,
+    debug_xml: bool,
    cancellation_flag: Option<&AtomicUsize>,
 ) -> Result<bool> {
    let mut _log_session = None;
@ -151,6 +152,60 @@ pub fn parse_file_at_path(
            println!("");
        }

+        if debug_xml {
+            let mut needs_newline = false;
+            let mut indent_level = 0;
+            let mut did_visit_children = false;
+            let mut tags: Vec<&str> = Vec::new();
+            loop {
+                let node = cursor.node();
+                let is_named = node.is_named();
+                if did_visit_children {
+                    if is_named {
+                        let tag = tags.pop();
+                        write!(&mut stdout, "</{}>\n", tag.expect("there is a tag"))?;
+                        needs_newline = true;
+                    }
+                    if cursor.goto_next_sibling() {
+                        did_visit_children = false;
+                    } else if cursor.goto_parent() {
+                        did_visit_children = true;
+                        indent_level -= 1;
+                    } else {
+                        break;
+                    }
+                } else {
+                    if is_named {
+                        if needs_newline {
+                            stdout.write(b"\n")?;
+                        }
+                        for _ in 0..indent_level {
+                            stdout.write(b"  ")?;
+                        }
+                        write!(&mut stdout, "<{}", node.kind())?;
+                        if let Some(field_name) = cursor.field_name() {
+                            write!(&mut stdout, " type=\"{}\"", field_name)?;
+                        }
+                        write!(&mut stdout, ">")?;
+                        tags.push(node.kind());
+                        needs_newline = true;
+                    }
+                    if cursor.goto_first_child() {
+                        did_visit_children = false;
+                        indent_level += 1;
+                    } else {
+                        did_visit_children = true;
+                        let start = node.start_byte();
+                        let end = node.end_byte();
+                        let value = std::str::from_utf8(&source_code[start..end]).expect("has a string");
+                        write!(&mut stdout, "{}", html_escape::encode_text(value))?;
+                    }
+                }
+            }
+            cursor.reset(tree.root_node());
+            println!("");
+        }
+
        let mut first_error = None;
        loop {
            let node = cursor.node();
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@ -6,9 +6,10 @@ use lazy_static::lazy_static;
 use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
 use regex::Regex;
 use std::char;
+use std::fmt::Write as FmtWrite;
 use std::fs;
 use std::io::{self, Write};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::str;
 use tree_sitter::{Language, LogType, Parser, Query};

@ -30,6 +31,7 @@ pub enum TestEntry {
    Group {
        name: String,
        children: Vec<TestEntry>,
+        file_path: Option<PathBuf>,
    },
    Example {
        name: String,
@ -44,6 +46,7 @@ impl Default for TestEntry {
        TestEntry::Group {
            name: String::new(),
            children: Vec::new(),
+            file_path: None,
        }
    }
 }
@ -54,6 +57,7 @@ pub fn run_tests_at_path(
    debug: bool,
    debug_graph: bool,
    filter: Option<&str>,
+    update: bool,
 ) -> Result<()> {
    let test_entry = parse_tests(path)?;
    let mut _log_session = None;
@ -72,27 +76,45 @@ pub fn run_tests_at_path(
    }

    let mut failures = Vec::new();
-    if let TestEntry::Group { children, .. } = test_entry {
-        for child in children {
-            run_tests(&mut parser, child, filter, 0, &mut failures)?;
-        }
-    }
+    let mut corrected_entries = Vec::new();
+    run_tests(
+        &mut parser,
+        test_entry,
+        filter,
+        0,
+        &mut failures,
+        update,
+        &mut corrected_entries,
+    )?;

    if failures.len() > 0 {
        println!("");

-        if failures.len() == 1 {
-            println!("1 failure:")
-        } else {
-            println!("{} failures:", failures.len())
-        }
+        if update {
+            if failures.len() == 1 {
+                println!("1 update:\n")
+            } else {
+                println!("{} updates:\n", failures.len())
+            }

-        print_diff_key();
-        for (i, (name, actual, expected)) in failures.iter().enumerate() {
-            println!("\n  {}. {}:", i + 1, name);
-            print_diff(actual, expected);
+            for (i, (name, ..)) in failures.iter().enumerate() {
+                println!("  {}. {}", i + 1, name);
+            }
+            Ok(())
+        } else {
+            if failures.len() == 1 {
+                println!("1 failure:")
+            } else {
+                println!("{} failures:", failures.len())
+            }
+
+            print_diff_key();
+            for (i, (name, actual, expected)) in failures.iter().enumerate() {
+                println!("\n  {}. {}:", i + 1, name);
+                print_diff(actual, expected);
+            }
+            Error::err(String::new())
        }
-        Error::err(String::new())
    } else {
        Ok(())
    }
@ -149,6 +171,8 @@ fn run_tests(
    filter: Option<&str>,
    mut indent_level: i32,
    failures: &mut Vec<(String, String, String)>,
+    update: bool,
+    corrected_entries: &mut Vec<(String, String, String)>,
 ) -> Result<()> {
    match test_entry {
        TestEntry::Example {
@ -159,6 +183,11 @@ fn run_tests(
        } => {
            if let Some(filter) = filter {
                if !name.contains(filter) {
+                    if update {
+                        let input = String::from_utf8(input).unwrap();
+                        let output = format_sexp(&output);
+                        corrected_entries.push((name, input, output));
+                    }
                    return Ok(());
                }
            }
@ -172,25 +201,138 @@ fn run_tests(
            }
            if actual == output {
                println!("✓ {}", Colour::Green.paint(&name));
+                if update {
+                    let input = String::from_utf8(input).unwrap();
+                    let output = format_sexp(&output);
+                    corrected_entries.push((name, input, output));
+                }
            } else {
-                println!("✗ {}", Colour::Red.paint(&name));
+                if update {
+                    let input = String::from_utf8(input).unwrap();
+                    let output = format_sexp(&actual);
+                    corrected_entries.push((name.clone(), input, output));
+                    println!("✓ {}", Colour::Blue.paint(&name));
+                } else {
+                    println!("✗ {}", Colour::Red.paint(&name));
+                }
                failures.push((name, actual, output));
            }
        }
-        TestEntry::Group { name, children } => {
-            for _ in 0..indent_level {
-                print!("  ");
+        TestEntry::Group {
+            name,
+            children,
+            file_path,
+        } => {
+            if indent_level > 0 {
+                for _ in 0..indent_level {
+                    print!("  ");
+                }
+                println!("{}:", name);
            }
-            println!("{}:", name);
+
+            let failure_count = failures.len();
+
            indent_level += 1;
            for child in children {
-                run_tests(parser, child, filter, indent_level, failures)?;
+                run_tests(
+                    parser,
+                    child,
+                    filter,
+                    indent_level,
+                    failures,
+                    update,
+                    corrected_entries,
+                )?;
+            }
+
+            if let Some(file_path) = file_path {
+                if update && failures.len() - failure_count > 0 {
+                    write_tests(&file_path, corrected_entries)?;
+                }
+                corrected_entries.clear();
            }
        }
    }
    Ok(())
 }

+fn format_sexp(sexp: &String) -> String {
+    let mut formatted = String::new();
+
+    let mut indent_level = 0;
+    let mut has_field = false;
+    let mut s_iter = sexp.split(|c| c == ' ' || c == ')');
+    while let Some(s) = s_iter.next() {
+        if s.is_empty() {
+            // ")"
+            indent_level -= 1;
+            write!(formatted, ")").unwrap();
+        } else if s.starts_with('(') {
+            if has_field {
+                has_field = false;
+            } else {
+                if indent_level > 0 {
+                    writeln!(formatted, "").unwrap();
+                    for _ in 0..indent_level {
+                        write!(formatted, "  ").unwrap();
+                    }
+                }
+                indent_level += 1;
+            }
+
+            // "(node_name"
+            write!(formatted, "{}", s).unwrap();
+
+            let mut c_iter = s.chars();
+            c_iter.next();
+            let second_char = c_iter.next().unwrap();
+            if second_char == 'M' || second_char == 'U' {
+                // "(MISSING node_name" or "(UNEXPECTED 'x'"
+                let s = s_iter.next().unwrap();
+                write!(formatted, " {}", s).unwrap();
+            }
+        } else if s.ends_with(':') {
+            // "field:"
+            writeln!(formatted, "").unwrap();
+            for _ in 0..indent_level {
+                write!(formatted, "  ").unwrap();
+            }
+            write!(formatted, "{} ", s).unwrap();
+            has_field = true;
+            indent_level += 1;
+        }
+    }
+
+    formatted
+}
+
+fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> {
+    let mut buffer = fs::File::create(file_path)?;
+    write_tests_to_buffer(&mut buffer, corrected_entries)
+}
+
+fn write_tests_to_buffer(
+    buffer: &mut impl Write,
+    corrected_entries: &Vec<(String, String, String)>,
+) -> Result<()> {
+    for (i, (name, input, output)) in corrected_entries.iter().enumerate() {
+        if i > 0 {
+            write!(buffer, "\n")?;
+        }
+        write!(
+            buffer,
+            "{}\n{}\n{}\n{}\n{}\n\n{}\n",
+            "=".repeat(80),
+            name,
+            "=".repeat(80),
+            input,
+            "-".repeat(80),
+            output.trim()
+        )?;
+    }
+    Ok(())
+}
+
 pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
    let name = path
        .file_stem()
@ -206,10 +348,14 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
                children.push(parse_tests(&entry.path())?);
            }
        }
-        Ok(TestEntry::Group { name, children })
+        Ok(TestEntry::Group {
+            name,
+            children,
+            file_path: None,
+        })
    } else {
        let content = fs::read_to_string(path)?;
-        Ok(parse_test_content(name, content))
+        Ok(parse_test_content(name, content, Some(path.to_path_buf())))
    }
 }

@ -217,7 +363,7 @@ pub fn strip_sexp_fields(sexp: String) -> String {
    SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string()
 }

-fn parse_test_content(name: String, content: String) -> TestEntry {
+fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>) -> TestEntry {
    let mut children = Vec::new();
    let bytes = content.as_bytes();
    let mut prev_name = String::new();
@ -268,7 +414,11 @@ fn parse_test_content(name: String, content: String) -> TestEntry {
            .to_string();
        prev_header_end = header_end;
    }
-    TestEntry::Group { name, children }
+    TestEntry::Group {
+        name,
+        children,
+        file_path,
+    }
 }

 #[cfg(test)]
@ -300,6 +450,7 @@ d
        "#
            .trim()
            .to_string(),
+            None,
        );

        assert_eq!(
@ -319,7 +470,8 @@ d
                        output: "(d)".to_string(),
                        has_fields: false,
                    },
-                ]
+                ],
+                file_path: None,
            }
        );
    }
@ -352,6 +504,7 @@ abc
        "#
            .trim()
            .to_string(),
+            None,
        );

        assert_eq!(
@ -371,8 +524,67 @@ abc
                        output: "(c (d))".to_string(),
                        has_fields: false,
                    },
-                ]
+                ],
+                file_path: None,
            }
        );
    }
+
+    #[test]
+    fn test_format_sexp() {
+        assert_eq!(
+            format_sexp(&"(a b: (c) (d) e: (f (g (h (MISSING i)))))".to_string()),
+            r#"
+(a
+  b: (c)
+  (d)
+  e: (f
+    (g
+      (h
+        (MISSING i)))))
+"#
+            .trim()
+            .to_string()
+        );
+    }
+
+    #[test]
+    fn test_write_tests_to_buffer() {
+        let mut buffer = Vec::new();
+        let corrected_entries = vec![
+            (
+                "title 1".to_string(),
+                "input 1".to_string(),
+                "output 1".to_string(),
+            ),
+            (
+                "title 2".to_string(),
+                "input 2".to_string(),
+                "output 2".to_string(),
+            ),
+        ];
+        write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap();
+        assert_eq!(
+            String::from_utf8(buffer).unwrap(),
+            r#"
+================================================================================
+title 1
+================================================================================
+input 1
+--------------------------------------------------------------------------------
+
+output 1
+
+================================================================================
+title 2
+================================================================================
+input 2
+--------------------------------------------------------------------------------
+
+output 2
+"#
+            .trim_start()
+            .to_string()
+        );
+    }
 }
--- a/cli/src/tests/corpus_test.rs
+++ b/cli/src/tests/corpus_test.rs
@ -390,7 +390,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
                }
                result.push((name, input, output, has_fields));
            }
-            TestEntry::Group { mut name, children } => {
+            TestEntry::Group { mut name, children, .. } => {
                if !prefix.is_empty() {
                    name.insert_str(0, " - ");
                    name.insert_str(0, prefix);
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@ -70,7 +70,7 @@ const RUBY_TAG_QUERY: &'static str = r#"
 (method
    name: (_) @name) @definition.method

-(method_call
+(call
    method: (identifier) @name) @reference.call

 (setter (identifier) @ignore)
@ -317,19 +317,17 @@ fn test_tags_with_parse_error() {
    assert!(failed, "syntax error should have been detected");

    assert_eq!(
-        newtags.iter()
+        newtags
+            .iter()
            .map(|t| (
                substr(source, &t.name_range),
                tags_config.syntax_type_name(t.syntax_type_id)
            ))
            .collect::<Vec<_>>(),
-        &[
-            ("Fine", "class"),
-        ]
+        &[("Fine", "class"),]
    );
 }

-
 #[test]
 fn test_tags_via_c_api() {
    allocations::record(|| {
--- a/docs/index.md
+++ b/docs/index.md
@ -31,7 +31,7 @@ Parsers for these languages are fairly complete:
 * [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
 * [C++](https://github.com/tree-sitter/tree-sitter-cpp)
 * [CSS](https://github.com/tree-sitter/tree-sitter-css)
-* [Elm](https://github.com/razzeee/tree-sitter-elm)
+* [Elm](https://github.com/elm-tooling/tree-sitter-elm)
 * [Eno](https://github.com/eno-lang/tree-sitter-eno)
 * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
 - [Fennel](https://github.com/travonted/tree-sitter-fennel)
@ -46,6 +46,7 @@ Parsers for these languages are fairly complete:
 * [Python](https://github.com/tree-sitter/tree-sitter-python)
 * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby)
 * [Rust](https://github.com/tree-sitter/tree-sitter-rust)
+* [R](https://github.com/r-lib/tree-sitter-r)
 * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl)
 * [TOML](https://github.com/ikatyang/tree-sitter-toml)
 * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
--- a/lib/Cargo.toml
+++ b/lib/Cargo.toml
@ -24,7 +24,7 @@ include = [
 regex = "1"

 [build-dependencies]
-cc = "1.0"
+cc = "^1.0.58"

 [lib]
 path = "binding_rust/lib.rs"
--- a/lib/binding_rust/lib.rs
+++ b/lib/binding_rust/lib.rs
@ -1147,6 +1147,12 @@ impl<'a> TreeCursor<'a> {
    }
 }

+impl<'a> Clone for TreeCursor<'a> {
+    fn clone(&self) -> Self {
+        TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData)
+    }
+}
+
 impl<'a> Drop for TreeCursor<'a> {
    fn drop(&mut self) {
        unsafe { ffi::ts_tree_cursor_delete(&mut self.0) }
--- a/lib/binding_web/README.md
+++ b/lib/binding_web/README.md
@ -7,7 +7,7 @@ WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-si

 ### Setup

-You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/tag/0.14.7) and load them using a standalone script:
+You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script:

 ```html
 <script src="/the/path/to/tree-sitter.js"/>
--- a/lib/binding_web/binding.c
+++ b/lib/binding_web/binding.c
@ -184,6 +184,20 @@ TSTree *ts_parser_parse_wasm(
  return ts_parser_parse(self, old_tree, input);
 }

+/**********************/
+/* Section - Language */
+/**********************/
+
+int ts_language_type_is_named_wasm(const TSLanguage *self, TSSymbol typeId) {
+  const TSSymbolType symbolType = ts_language_symbol_type(self, typeId);
+  return symbolType == TSSymbolTypeRegular;
+}
+
+int ts_language_type_is_visible_wasm(const TSLanguage *self, TSSymbol typeId) {
+  const TSSymbolType symbolType = ts_language_symbol_type(self, typeId);
+  return symbolType <= TSSymbolTypeAnonymous;
+}
+
 /******************/
 /* Section - Tree */
 /******************/
--- a/lib/binding_web/binding.js
+++ b/lib/binding_web/binding.js
@ -646,6 +646,32 @@ class Language {
    return this.fields[fieldId] || null;
  }

+  idForNodeType(type, named) {
+    const typeLength = lengthBytesUTF8(type);
+    const typeAddress = C._malloc(typeLength + 1);
+    stringToUTF8(type, typeAddress, typeLength + 1);
+    const result = C._ts_language_symbol_for_name(this[0], typeAddress, typeLength, named);
+    C._free(typeAddress);
+    return result || null;
+  }
+
+  get nodeTypeCount() {
+    return C._ts_language_symbol_count(this[0]);
+  }
+
+  nodeTypeForId(typeId) {
+    const name = C._ts_language_symbol_name(this[0], typeId);
+    return name ? UTF8ToString(name) : null;
+  }
+
+  nodeTypeIsNamed(typeId) {
+    return C._ts_language_type_is_named_wasm(this[0], typeId) ? true : false;
+  }
+
+  nodeTypeIsVisible(typeId) {
+    return C._ts_language_type_is_visible_wasm(this[0], typeId) ? true : false;
+  }
+
  query(source) {
    const sourceLength = lengthBytesUTF8(source);
    const sourceAddress = C._malloc(sourceLength + 1);
@ -856,30 +882,41 @@ class Language {
    );
  }

-  static load(url) {
+  static load(input) {
    let bytes;
-    if (
-      typeof process !== 'undefined' &&
-      process.versions &&
-      process.versions.node
-    ) {
-      const fs = require('fs');
-      bytes = Promise.resolve(fs.readFileSync(url));
+    if (input instanceof Uint8Array) {
+      bytes = Promise.resolve(input);
    } else {
-      bytes = fetch(url)
-        .then(response => response.arrayBuffer()
-          .then(buffer => {
-            if (response.ok) {
-              return new Uint8Array(buffer);
-            } else {
-              const body = new TextDecoder('utf-8').decode(buffer);
-              throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`)
-            }
-          }));
+      const url = input;
+      if (
+        typeof process !== 'undefined' &&
+        process.versions &&
+        process.versions.node
+      ) {
+        const fs = require('fs');
+        bytes = Promise.resolve(fs.readFileSync(url));
+      } else {
+        bytes = fetch(url)
+          .then(response => response.arrayBuffer()
+            .then(buffer => {
+              if (response.ok) {
+                return new Uint8Array(buffer);
+              } else {
+                const body = new TextDecoder('utf-8').decode(buffer);
+                throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`)
+              }
+            }));
+      }
    }

+    // emscripten-core/emscripten#12969
+    const loadModule =
+      typeof loadSideModule === 'function'
+      ? loadSideModule
+      : loadWebAssemblyModule;
+
    return bytes
-      .then(bytes => loadSideModule(bytes, {loadAsync: true}))
+      .then(bytes => loadModule(bytes, {loadAsync: true}))
      .then(mod => {
        const symbolNames = Object.keys(mod)
        const functionName = symbolNames.find(key =>
@ -1139,3 +1176,4 @@ function marshalEdit(edit) {
 }

 Parser.Language = Language;
+Parser.Parser = Parser;
--- a/lib/binding_web/exports.json
+++ b/lib/binding_web/exports.json
@ -31,7 +31,10 @@
  "_ts_init",
  "_ts_language_field_count",
  "_ts_language_field_name_for_id",
+  "_ts_language_type_is_named_wasm",
+  "_ts_language_type_is_visible_wasm",
  "_ts_language_symbol_count",
+  "_ts_language_symbol_for_name",
  "_ts_language_symbol_name",
  "_ts_language_symbol_type",
  "_ts_language_version",
@ -79,6 +82,7 @@
  "_ts_query_predicates_for_pattern",
  "_ts_query_string_count",
  "_ts_query_string_value_for_id",
+  "_ts_tree_copy",
  "_ts_tree_cursor_current_field_id_wasm",
  "_ts_tree_cursor_current_node_id_wasm",
  "_ts_tree_cursor_current_node_is_missing_wasm",
--- a/lib/binding_web/package.json
+++ b/lib/binding_web/package.json
@ -1,6 +1,6 @@
 {
  "name": "web-tree-sitter",
-  "version": "0.17.1",
+  "version": "0.18.0",
  "description": "Tree-sitter bindings for the web",
  "main": "tree-sitter.js",
  "types": "tree-sitter-web.d.ts",
--- a/lib/binding_web/test/language-test.js
+++ b/lib/binding_web/test/language-test.js
@ -0,0 +1,44 @@
+const { assert } = require("chai");
+let JavaScript;
+
+describe("Language", () => {
+  before(async () => ({ JavaScript } = await require("./helper")));
+
+  describe(".fieldIdForName, .fieldNameForId", () => {
+    it("converts between the string and integer representations of fields", () => {
+      const nameId = JavaScript.fieldIdForName("name");
+      const bodyId = JavaScript.fieldIdForName("body");
+
+      assert.isBelow(nameId, JavaScript.fieldCount);
+      assert.isBelow(bodyId, JavaScript.fieldCount);
+      assert.equal("name", JavaScript.fieldNameForId(nameId));
+      assert.equal("body", JavaScript.fieldNameForId(bodyId));
+    });
+
+    it("handles invalid inputs", () => {
+      assert.equal(null, JavaScript.fieldIdForName("namezzz"));
+      assert.equal(null, JavaScript.fieldNameForId(-1));
+      assert.equal(null, JavaScript.fieldNameForId(10000));
+    });
+  });
+
+  describe(".idForNodeType, .nodeTypeForId, .nodeTypeIsNamed", () => {
+    it("converts between the string and integer representations of a node type", () => {
+      const exportStatementId = JavaScript.idForNodeType("export_statement", true);
+      const starId = JavaScript.idForNodeType("*", false);
+
+      assert.isBelow(exportStatementId, JavaScript.nodeTypeCount);
+      assert.isBelow(starId, JavaScript.nodeTypeCount);
+      assert.equal(true, JavaScript.nodeTypeIsNamed(exportStatementId))
+      assert.equal("export_statement", JavaScript.nodeTypeForId(exportStatementId))
+      assert.equal(false, JavaScript.nodeTypeIsNamed(starId))
+      assert.equal("*", JavaScript.nodeTypeForId(starId))
+    });
+
+    it("handles invalid inputs", () => {
+      assert.equal(null, JavaScript.nodeTypeForId(-1));
+      assert.equal(null, JavaScript.nodeTypeForId(10000));
+      assert.equal(null, JavaScript.idForNodeType("export_statement", false));
+    });
+  });
+});
--- a/lib/binding_web/test/tree-test.js
+++ b/lib/binding_web/test/tree-test.js
@ -323,6 +323,31 @@ describe("Tree", () => {
      assert(!cursor.gotoParent());
    })
  });
+
+  describe(".copy", () => {
+    it("creates another tree that remains stable if the original tree is edited", () => {
+      input = 'abc + cde';
+      tree = parser.parse(input);
+      assert.equal(
+        tree.rootNode.toString(),
+        "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))"
+      );
+
+      const tree2 = tree.copy();
+      ([input, edit] = spliceInput(input, 3, 0, '123'));
+      assert.equal(input, 'abc123 + cde');
+      tree.edit(edit);
+
+      const leftNode = tree.rootNode.firstChild.firstChild.firstChild;
+      const leftNode2 = tree2.rootNode.firstChild.firstChild.firstChild;
+      const rightNode = tree.rootNode.firstChild.firstChild.lastChild;
+      const rightNode2 = tree2.rootNode.firstChild.firstChild.lastChild;
+      assert.equal(leftNode.endIndex, 6)
+      assert.equal(leftNode2.endIndex, 3)
+      assert.equal(rightNode.startIndex, 9)
+      assert.equal(rightNode2.startIndex, 6)
+    });
+  });
 });

 function spliceInput(input, startIndex, lengthRemoved, newText) {
--- a/lib/binding_web/tree-sitter-web.d.ts
+++ b/lib/binding_web/tree-sitter-web.d.ts
@ -127,13 +127,18 @@ declare module 'web-tree-sitter' {
    }

    class Language {
-      static load(path: string): Promise<Language>;
+      static load(input: string | Uint8Array): Promise<Language>;

      readonly version: number;
      readonly fieldCount: number;
+      readonly nodeTypeCount: number;

      fieldNameForId(fieldId: number): string | null;
      fieldIdForName(fieldName: string): number | null;
+      idForNodeType(type: string, named: boolean): number;
+      nodeTypeForId(typeId: number): string | null;
+      nodeTypeIsNamed(typeId: number): boolean;
+      nodeTypeIsVisible(typeId: number): boolean;
      query(source: string): Query;
    }

--- a/lib/src/tree_cursor.c
+++ b/lib/src/tree_cursor.c
@ -448,6 +448,7 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
  TSTreeCursor res = {NULL, NULL, {0, 0}};
  TreeCursor *copy = (TreeCursor *)&res;
  copy->tree = cursor->tree;
+  array_init(&copy->stack);
  array_push_all(&copy->stack, &cursor->stack);
  return res;
 }
--- a/test/fixtures/error_corpus/ruby_errors.txt
+++ b/test/fixtures/error_corpus/ruby_errors.txt
@ -10,7 +10,7 @@ c
 ---

 (program
-  (method_call
+  (call
    method: (identifier)
    (ERROR (heredoc_beginning))
    arguments: (argument_list