From fa199e3a1a1f300e6acabe3546e92ba180167f65 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Tue, 16 Jun 2020 16:04:02 -0700
Subject: [PATCH 01/71] Allow most tags to be arbitrarily named, remove
 hardcoded kinds

---
 tags/src/lib.rs | 104 ++++++++++++++++--------------------------------
 1 file changed, 35 insertions(+), 69 deletions(-)
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 8d1853bb..296ac9ba 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -4,7 +4,8 @@ use memchr::{memchr, memrchr};
 use regex::Regex;
 use std::ops::Range;
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{fmt, mem, str};
+use std::{mem, str};
+use std::collections::HashMap;
 use tree_sitter::{
     Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
 };
@@ -18,12 +19,8 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100;
 pub struct TagsConfiguration {
     pub language: Language,
     pub query: Query,
-    call_capture_index: Option<u32>,
-    class_capture_index: Option<u32>,
+    capture_map: HashMap<u32, String>,
     doc_capture_index: Option<u32>,
-    function_capture_index: Option<u32>,
-    method_capture_index: Option<u32>,
-    module_capture_index: Option<u32>,
     name_capture_index: Option<u32>,
     local_scope_capture_index: Option<u32>,
     local_definition_capture_index: Option<u32>,
@@ -38,21 +35,13 @@ pub struct TagsContext {
 
 #[derive(Debug, Clone)]
 pub struct Tag {
-    pub kind: TagKind,
     pub range: Range<usize>,
     pub name_range: Range<usize>,
     pub line_range: Range<usize>,
     pub span: Range<Point>,
     pub docs: Option<String>,
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum TagKind {
-    Function,
-    Method,
-    Class,
-    Module,
-    Call,
+    pub is_definition: bool,
+    pub kind: String,
 }
 
 #[derive(Debug, PartialEq)]
@@ -111,29 +100,23 @@ impl TagsConfiguration {
             }
         }
 
-        let mut call_capture_index = None;
-        let mut class_capture_index = None;
+        let mut capture_map: HashMap<u32, String> = HashMap::new();
         let mut doc_capture_index = None;
-        let mut function_capture_index = None;
-        let mut method_capture_index = None;
-        let mut module_capture_index = None;
         let mut name_capture_index = None;
         let mut local_scope_capture_index = None;
         let mut local_definition_capture_index = None;
         for (i, name) in query.capture_names().iter().enumerate() {
-            let index = match name.as_str() {
-                "call" => &mut call_capture_index,
-                "class" => &mut class_capture_index,
-                "doc" => &mut doc_capture_index,
-                "function" => &mut function_capture_index,
-                "method" => &mut method_capture_index,
-                "module" => &mut module_capture_index,
-                "name" => &mut name_capture_index,
-                "local.scope" => &mut local_scope_capture_index,
-                "local.definition" => &mut local_definition_capture_index,
-                _ => continue,
-            };
-            *index = Some(i as u32);
+            match name.as_str() {
+                "" => continue,
+                "name" => name_capture_index = Some(i as u32),
+                "doc" => doc_capture_index = Some(i as u32),
+                "local.scope" => local_scope_capture_index = Some(i as u32),
+                "local.definition" => local_definition_capture_index = Some(i as u32),
+                _ => {
+                    capture_map.insert(i as u32, name.to_string());
+                    continue;
+                }
+            }
         }
 
         let pattern_info = (0..query.pattern_count())
@@ -180,12 +163,8 @@ impl TagsConfiguration {
         Ok(TagsConfiguration {
             language,
             query,
-            function_capture_index,
-            class_capture_index,
-            method_capture_index,
-            module_capture_index,
+            capture_map,
             doc_capture_index,
-            call_capture_index,
             name_capture_index,
             tags_pattern_index,
             local_scope_capture_index,
@@ -303,7 +282,8 @@ where
                 let mut name_range = None;
                 let mut doc_nodes = Vec::new();
                 let mut tag_node = None;
-                let mut kind = TagKind::Call;
+                let mut kind = "unknown";
+                let mut is_definition = false;
                 let mut docs_adjacent_node = None;
 
                 for capture in mat.captures {
@@ -317,21 +297,18 @@ where
                         name_range = Some(capture.node.byte_range());
                     } else if index == self.config.doc_capture_index {
                         doc_nodes.push(capture.node);
-                    } else if index == self.config.call_capture_index {
+                    }
+
+                    if let Some(name) = self.config.capture_map.get(&capture.index) {
                         tag_node = Some(capture.node);
-                        kind = TagKind::Call;
-                    } else if index == self.config.class_capture_index {
-                        tag_node = Some(capture.node);
-                        kind = TagKind::Class;
-                    } else if index == self.config.function_capture_index {
-                        tag_node = Some(capture.node);
-                        kind = TagKind::Function;
-                    } else if index == self.config.method_capture_index {
-                        tag_node = Some(capture.node);
-                        kind = TagKind::Method;
-                    } else if index == self.config.module_capture_index {
-                        tag_node = Some(capture.node);
-                        kind = TagKind::Module;
+                        kind = if name.starts_with("definition.") {
+                            is_definition = true;
+                            name.trim_start_matches("definition.")
+                        } else if name.starts_with("reference.") {
+                            name.trim_start_matches("reference.")
+                        } else {
+                            name
+                        }
                     }
                 }
 
@@ -414,10 +391,11 @@ where
                                 *tag = Tag {
                                     line_range: line_range(self.source, range.start, MAX_LINE_LEN),
                                     span: tag_node.start_position()..tag_node.end_position(),
-                                    kind,
                                     range,
                                     name_range,
                                     docs,
+                                    kind: kind.to_string(),
+                                    is_definition,
                                 };
                             }
                         }
@@ -427,10 +405,11 @@ where
                                 Tag {
                                     line_range: line_range(self.source, range.start, MAX_LINE_LEN),
                                     span: tag_node.start_position()..tag_node.end_position(),
-                                    kind,
                                     range,
                                     name_range,
                                     docs,
+                                    kind: kind.to_string(),
+                                    is_definition,
                                 },
                                 mat.pattern_index,
                             ),
@@ -448,19 +427,6 @@ where
     }
 }
 
-impl fmt::Display for TagKind {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            TagKind::Call => "Call",
-            TagKind::Module => "Module",
-            TagKind::Class => "Class",
-            TagKind::Method => "Method",
-            TagKind::Function => "Function",
-        }
-        .fmt(f)
-    }
-}
-
 impl From<regex::Error> for Error {
     fn from(error: regex::Error) -> Self {
         Error::Regex(error)

From 8d7459ed578b8f66bde36624c3f91e40d54d79a2 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Tue, 16 Jun 2020 16:04:13 -0700
Subject: [PATCH 02/71] Bring c_lib inline

---
 tags/src/c_lib.rs | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index 0c367977..c8ca8ed5 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -1,4 +1,4 @@
-use super::{Error, TagKind, TagsConfiguration, TagsContext};
+use super::{Error, TagsConfiguration, TagsContext};
 use std::collections::HashMap;
 use std::ffi::CStr;
 use std::process::abort;
@@ -19,16 +19,6 @@ pub enum TSTagsError {
     Unknown,
 }
 
-#[repr(C)]
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum TSTagKind {
-    Function,
-    Method,
-    Class,
-    Module,
-    Call,
-}
-
 #[repr(C)]
 pub struct TSPoint {
     row: u32,
@@ -37,7 +27,6 @@ pub struct TSPoint {
 
 #[repr(C)]
 pub struct TSTag {
-    pub kind: TSTagKind,
     pub start_byte: u32,
     pub end_byte: u32,
     pub name_start_byte: u32,
@@ -48,6 +37,8 @@ pub struct TSTag {
     pub end_point: TSPoint,
     pub docs_start_byte: u32,
     pub docs_end_byte: u32,
+    pub kind: String,
+    pub is_definition: bool,
 }
 
 pub struct TSTagger {
@@ -153,13 +144,6 @@ pub extern "C" fn ts_tagger_tag(
                 buffer.docs.extend_from_slice(docs.as_bytes());
             }
             buffer.tags.push(TSTag {
-                kind: match tag.kind {
-                    TagKind::Function => TSTagKind::Function,
-                    TagKind::Method => TSTagKind::Method,
-                    TagKind::Class => TSTagKind::Class,
-                    TagKind::Module => TSTagKind::Module,
-                    TagKind::Call => TSTagKind::Call,
-                },
                 start_byte: tag.range.start as u32,
                 end_byte: tag.range.end as u32,
                 name_start_byte: tag.name_range.start as u32,
@@ -176,6 +160,8 @@ pub extern "C" fn ts_tagger_tag(
                 },
                 docs_start_byte: prev_docs_len as u32,
                 docs_end_byte: buffer.docs.len() as u32,
+                kind: tag.kind,
+                is_definition: tag.is_definition,
             });
         }
 

From 9bf4939b9a1093f6c42d0bdcf268fef8a4e04d8f Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Tue, 16 Jun 2020 16:04:22 -0700
Subject: [PATCH 03/71] Show if tag is a def/ref in the cli

---
 cli/src/tags.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cli/src/tags.rs b/cli/src/tags.rs
index d6704ec5..6308d396 100644
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@@ -42,9 +42,10 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
                 let tag = tag?;
                 write!(
                     &mut stdout,
-                    "  {:<8} {:<40}\t{:>9}-{:<9}",
+                    "  {:<8} {:<40}\t [{}] {:>9}-{:<9}",
                     tag.kind,
                     str::from_utf8(&source[tag.name_range]).unwrap_or(""),
+                    if tag.is_definition { "definition" } else { "reference" },
                     tag.span.start,
                     tag.span.end,
                 )?;

From d802b3779145d833dc16e3e075f8e34dd684504a Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Tue, 16 Jun 2020 17:09:34 -0700
Subject: [PATCH 04/71] Bring back a SyntaxType enum

---
 cli/src/tags.rs   |  2 +-
 tags/src/c_lib.rs | 28 ++++++++++++--
 tags/src/lib.rs   | 98 ++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 105 insertions(+), 23 deletions(-)

diff --git a/cli/src/tags.rs b/cli/src/tags.rs
index 6308d396..06f4f4fa 100644
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@@ -43,7 +43,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
                 write!(
                     &mut stdout,
                     "  {:<8} {:<40}\t [{}] {:>9}-{:<9}",
-                    tag.kind,
+                    tag.syntax_type,
                     str::from_utf8(&source[tag.name_range]).unwrap_or(""),
                     if tag.is_definition { "definition" } else { "reference" },
                     tag.span.start,
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index c8ca8ed5..72c708d0 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -1,4 +1,4 @@
-use super::{Error, TagsConfiguration, TagsContext};
+use super::{Error, SyntaxType, TagsConfiguration, TagsContext};
 use std::collections::HashMap;
 use std::ffi::CStr;
 use std::process::abort;
@@ -19,6 +19,19 @@ pub enum TSTagsError {
     Unknown,
 }
 
+#[repr(C)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum TSSyntaxType {
+    Function,
+    Method,
+    Class,
+    Module,
+    Call,
+    Type,
+    Interface,
+    Implementation,
+}
+
 #[repr(C)]
 pub struct TSPoint {
     row: u32,
@@ -37,7 +50,7 @@ pub struct TSTag {
     pub end_point: TSPoint,
     pub docs_start_byte: u32,
     pub docs_end_byte: u32,
-    pub kind: String,
+    pub syntax_type: TSSyntaxType,
     pub is_definition: bool,
 }
 
@@ -160,7 +173,16 @@ pub extern "C" fn ts_tagger_tag(
                 },
                 docs_start_byte: prev_docs_len as u32,
                 docs_end_byte: buffer.docs.len() as u32,
-                kind: tag.kind,
+                syntax_type: match tag.syntax_type {
+                    SyntaxType::Function => TSSyntaxType::Function,
+                    SyntaxType::Method => TSSyntaxType::Method,
+                    SyntaxType::Class => TSSyntaxType::Class,
+                    SyntaxType::Module => TSSyntaxType::Module,
+                    SyntaxType::Call => TSSyntaxType::Call,
+                    SyntaxType::Type => TSSyntaxType::Type,
+                    SyntaxType::Interface => TSSyntaxType::Interface,
+                    SyntaxType::Implementation => TSSyntaxType::Implementation,
+                },
                 is_definition: tag.is_definition,
             });
         }
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 296ac9ba..e6179b8b 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -4,7 +4,7 @@ use memchr::{memchr, memrchr};
 use regex::Regex;
 use std::ops::Range;
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{mem, str};
+use std::{fmt, mem, str};
 use std::collections::HashMap;
 use tree_sitter::{
     Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
@@ -19,7 +19,7 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100;
 pub struct TagsConfiguration {
     pub language: Language,
     pub query: Query,
-    capture_map: HashMap<u32, String>,
+    capture_map: HashMap<u32, NamedCapture>,
     doc_capture_index: Option<u32>,
     name_capture_index: Option<u32>,
     local_scope_capture_index: Option<u32>,
@@ -28,6 +28,27 @@ pub struct TagsConfiguration {
     pattern_info: Vec<PatternInfo>,
 }
 
+
+#[derive(Debug)]
+pub struct NamedCapture {
+    pub syntax_type: SyntaxType,
+    pub is_definition: bool,
+}
+
+// Should stay in sync with list of valid syntax types in semantic.
+// See: https://github.com/github/semantic/blob/621696f5bc523a651f1cf9fc2ac58c557ea02d07/proto/semantic.proto#L165-L174
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum SyntaxType {
+    Function,
+    Method,
+    Class,
+    Module,
+    Call,
+    Type,
+    Interface,
+    Implementation,
+}
+
 pub struct TagsContext {
     parser: Parser,
     cursor: QueryCursor,
@@ -41,7 +62,7 @@ pub struct Tag {
     pub span: Range<Point>,
     pub docs: Option<String>,
     pub is_definition: bool,
-    pub kind: String,
+    pub syntax_type: SyntaxType,
 }
 
 #[derive(Debug, PartialEq)]
@@ -100,7 +121,7 @@ impl TagsConfiguration {
             }
         }
 
-        let mut capture_map: HashMap<u32, String> = HashMap::new();
+        let mut capture_map: HashMap<u32, NamedCapture> = HashMap::new();
         let mut doc_capture_index = None;
         let mut name_capture_index = None;
         let mut local_scope_capture_index = None;
@@ -112,9 +133,8 @@ impl TagsConfiguration {
                 "doc" => doc_capture_index = Some(i as u32),
                 "local.scope" => local_scope_capture_index = Some(i as u32),
                 "local.definition" => local_definition_capture_index = Some(i as u32),
-                _ => {
-                    capture_map.insert(i as u32, name.to_string());
-                    continue;
+                _ => if let Some(nc) = NamedCapture::new(name) {
+                    capture_map.insert(i as u32, nc);
                 }
             }
         }
@@ -282,7 +302,7 @@ where
                 let mut name_range = None;
                 let mut doc_nodes = Vec::new();
                 let mut tag_node = None;
-                let mut kind = "unknown";
+                let mut syntax_type = SyntaxType::Function;
                 let mut is_definition = false;
                 let mut docs_adjacent_node = None;
 
@@ -299,16 +319,18 @@ where
                         doc_nodes.push(capture.node);
                     }
 
-                    if let Some(name) = self.config.capture_map.get(&capture.index) {
+                    if let Some(named_capture) = self.config.capture_map.get(&capture.index) {
                         tag_node = Some(capture.node);
-                        kind = if name.starts_with("definition.") {
-                            is_definition = true;
-                            name.trim_start_matches("definition.")
-                        } else if name.starts_with("reference.") {
-                            name.trim_start_matches("reference.")
-                        } else {
-                            name
-                        }
+                        syntax_type = named_capture.syntax_type;
+                        is_definition = named_capture.is_definition;
+                        // kind = if name.starts_with("definition.") {
+                        //     is_definition = true;
+                        //     name.trim_start_matches("definition.")
+                        // } else if name.starts_with("reference.") {
+                        //     name.trim_start_matches("reference.")
+                        // } else {
+                        //     name
+                        // }
                     }
                 }
 
@@ -394,7 +416,7 @@ where
                                     range,
                                     name_range,
                                     docs,
-                                    kind: kind.to_string(),
+                                    syntax_type,
                                     is_definition,
                                 };
                             }
@@ -408,7 +430,7 @@ where
                                     range,
                                     name_range,
                                     docs,
-                                    kind: kind.to_string(),
+                                    syntax_type,
                                     is_definition,
                                 },
                                 mat.pattern_index,
@@ -427,6 +449,44 @@ where
     }
 }
 
+impl NamedCapture {
+    pub fn new(name: &String) -> Option<NamedCapture> {
+        let mut is_definition = false;
+
+        let kind = if name.starts_with("definition.") {
+            is_definition = true;
+            name.trim_start_matches("definition.")
+        } else if name.starts_with("reference.") {
+            name.trim_start_matches("reference.")
+        } else {
+            name
+        };
+
+        let syntax_type = match kind.as_ref() {
+            "function" => {is_definition = true; SyntaxType::Function},
+            "method" => {is_definition = true; SyntaxType::Method},
+            "class" => SyntaxType::Class,
+            "module" => SyntaxType::Module,
+            "call" => SyntaxType::Call,
+            "type" => SyntaxType::Type,
+            "interface" => SyntaxType::Interface,
+            "implementation" => SyntaxType::Implementation,
+            _ => return None,
+        };
+
+        return Some(NamedCapture{
+            syntax_type,
+            is_definition
+        })
+    }
+}
+
+impl fmt::Display for SyntaxType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
 impl From<regex::Error> for Error {
     fn from(error: regex::Error) -> Self {
         Error::Regex(error)

From 80f5c522594de99d487aa12a756f369ae48372a3 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Tue, 16 Jun 2020 17:19:35 -0700
Subject: [PATCH 05/71] Tests compile

---
 cli/src/tests/tags_test.rs | 42 +++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index fad8ebd8..b6283507 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -3,7 +3,7 @@ use super::helpers::fixtures::{get_language, get_language_queries_path};
 use std::ffi::CString;
 use std::{fs, ptr, slice, str};
 use tree_sitter_tags::c_lib as c;
-use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext};
+use tree_sitter_tags::{Error, SyntaxType, TagsConfiguration, TagsContext};
 
 const PYTHON_TAG_QUERY: &'static str = r#"
 (
@@ -99,12 +99,12 @@ fn test_tags_python() {
 
     assert_eq!(
         tags.iter()
-            .map(|t| (substr(source, &t.name_range), t.kind))
+            .map(|t| (substr(source, &t.name_range), t.syntax_type))
             .collect::<Vec<_>>(),
         &[
-            ("Customer", TagKind::Class),
-            ("age", TagKind::Function),
-            ("compute_age", TagKind::Call),
+            ("Customer", SyntaxType::Class),
+            ("age", SyntaxType::Function),
+            ("compute_age", SyntaxType::Call),
         ]
     );
 
@@ -150,12 +150,12 @@ fn test_tags_javascript() {
 
     assert_eq!(
         tags.iter()
-            .map(|t| (substr(source, &t.name_range), t.kind))
+            .map(|t| (substr(source, &t.name_range), t.syntax_type))
             .collect::<Vec<_>>(),
         &[
-            ("Customer", TagKind::Class),
-            ("getAge", TagKind::Method),
-            ("Agent", TagKind::Class)
+            ("Customer", SyntaxType::Class),
+            ("getAge", SyntaxType::Method),
+            ("Agent", SyntaxType::Class)
         ]
     );
     assert_eq!(
@@ -204,18 +204,18 @@ fn test_tags_ruby() {
         tags.iter()
             .map(|t| (
                 substr(source.as_bytes(), &t.name_range),
-                t.kind,
+                t.syntax_type,
                 (t.span.start.row, t.span.start.column),
             ))
             .collect::<Vec<_>>(),
         &[
-            ("foo", TagKind::Method, (2, 0)),
-            ("bar", TagKind::Call, (7, 4)),
-            ("a", TagKind::Call, (7, 8)),
-            ("b", TagKind::Call, (7, 11)),
-            ("each", TagKind::Call, (9, 14)),
-            ("baz", TagKind::Call, (13, 8)),
-            ("b", TagKind::Call, (13, 15),),
+            ("foo", SyntaxType::Method, (2, 0)),
+            ("bar", SyntaxType::Call, (7, 4)),
+            ("a", SyntaxType::Call, (7, 8)),
+            ("b", SyntaxType::Call, (7, 11)),
+            ("each", SyntaxType::Call, (9, 14)),
+            ("baz", SyntaxType::Call, (13, 8)),
+            ("b", SyntaxType::Call, (13, 15),),
         ]
     );
 }
@@ -319,7 +319,7 @@ fn test_tags_via_c_api() {
         assert_eq!(
             tags.iter()
                 .map(|tag| (
-                    tag.kind,
+                    tag.syntax_type,
                     &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
                     &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
                     &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
@@ -327,18 +327,18 @@ fn test_tags_via_c_api() {
                 .collect::<Vec<_>>(),
             &[
                 (
-                    c::TSTagKind::Function,
+                    c::TSSyntaxType::Function,
                     "b",
                     "function b() {",
                     "one\ntwo\nthree"
                 ),
                 (
-                    c::TSTagKind::Class,
+                    c::TSSyntaxType::Class,
                     "C",
                     "class C extends D {",
                     "four\nfive"
                 ),
-                (c::TSTagKind::Call, "b", "b(a);", "")
+                (c::TSSyntaxType::Call, "b", "b(a);", "")
             ]
         );
 

From 929bb40adcb3678b3a229a272222bd3edab62ecf Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 10:34:55 -0700
Subject: [PATCH 06/71] Shorten to def/ref

---
 cli/src/tags.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/src/tags.rs b/cli/src/tags.rs
index 06f4f4fa..4869b8cc 100644
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@@ -45,7 +45,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
                     "  {:<8} {:<40}\t [{}] {:>9}-{:<9}",
                     tag.syntax_type,
                     str::from_utf8(&source[tag.name_range]).unwrap_or(""),
-                    if tag.is_definition { "definition" } else { "reference" },
+                    if tag.is_definition { "def" } else { "ref" },
                     tag.span.start,
                     tag.span.end,
                 )?;

From c08333e0cdbf0cb47253abe1eb856f3f80e4a9ea Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 10:35:07 -0700
Subject: [PATCH 07/71] Defer to debug formatting take 2

---
 tags/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index e6179b8b..dd74f833 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -483,7 +483,7 @@ impl NamedCapture {
 
 impl fmt::Display for SyntaxType {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
+        format!("{:?}", self).fmt(f)
     }
 }
 

From 3e8bf9daceb19c64cf3e84530d62594729000d1a Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 10:35:16 -0700
Subject: [PATCH 08/71] These are always definitions

---
 tags/src/lib.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index dd74f833..991d3cb5 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -465,8 +465,8 @@ impl NamedCapture {
         let syntax_type = match kind.as_ref() {
             "function" => {is_definition = true; SyntaxType::Function},
             "method" => {is_definition = true; SyntaxType::Method},
-            "class" => SyntaxType::Class,
-            "module" => SyntaxType::Module,
+            "class" => {is_definition = true; SyntaxType::Class},
+            "module" => {is_definition = true; SyntaxType::Module},
             "call" => SyntaxType::Call,
             "type" => SyntaxType::Type,
             "interface" => SyntaxType::Interface,

From 30132c682b22b57d7f42883f2cb8480691182551 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 14:12:14 -0700
Subject: [PATCH 09/71] Bring tags.h inline

---
 tags/include/tree_sitter/tags.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h
index 946dc6f1..e1ed68bd 100644
--- a/tags/include/tree_sitter/tags.h
+++ b/tags/include/tree_sitter/tags.h
@@ -19,15 +19,17 @@ typedef enum {
 } TSTagsError;
 
 typedef enum {
-  TSTagKindFunction,
-  TSTagKindMethod,
-  TSTagKindClass,
-  TSTagKindModule,
-  TSTagKindCall,
-} TSTagKind;
+  TSSyntaxTypeFunction,
+  TSSyntaxTypeMethod,
+  TSSyntaxTypeClass,
+  TSSyntaxTypeModule,
+  TSSyntaxTypeCall,
+  TSSyntaxTypeType,
+  TSSyntaxTypeInterface,
+  TSSyntaxTypeImplementation,
+} TSTagSyntaxType;
 
 typedef struct {
-  TSTagKind kind;
   uint32_t start_byte;
   uint32_t end_byte;
   uint32_t name_start_byte;
@@ -38,6 +40,8 @@ typedef struct {
   TSPoint end_point;
   uint32_t docs_start_byte;
   uint32_t docs_end_byte;
+  TSTagSyntaxType syntax_type;
+  bool is_definition;
 } TSTag;
 
 typedef struct TSTagger TSTagger;

From 15202d0b382a083ffa7d3019eec9348c5c35c7d9 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 15:11:31 -0700
Subject: [PATCH 10/71] Remove commented code

---
 tags/src/lib.rs | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 991d3cb5..8cd73457 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -323,14 +323,6 @@ where
                         tag_node = Some(capture.node);
                         syntax_type = named_capture.syntax_type;
                         is_definition = named_capture.is_definition;
-                        // kind = if name.starts_with("definition.") {
-                        //     is_definition = true;
-                        //     name.trim_start_matches("definition.")
-                        // } else if name.starts_with("reference.") {
-                        //     name.trim_start_matches("reference.")
-                        // } else {
-                        //     name
-                        // }
                     }
                 }
 

From 3c39b016a4c538d645a7e0f5bdfd476e4588afd9 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 15:11:42 -0700
Subject: [PATCH 11/71] Trim whitespace from tag source lines

---
 tags/src/lib.rs | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 8cd73457..32eaa0d9 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -495,7 +495,16 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range<usize> {
     let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1);
     let max_line_len = max_line_len.min(text.len() - start);
     let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len);
-    start..end
+    trim_start(text, start..end)
+}
+
+fn trim_start(text: &[u8], r: Range<usize>) -> Range<usize> {
+    for (index, c) in text[r.start..r.end].iter().enumerate() {
+        if !c.is_ascii_whitespace(){
+            return index..r.end
+        }
+    }
+    return r
 }
 
 #[cfg(test)]
@@ -514,4 +523,13 @@ mod tests {
         assert_eq!(line_range(text, 5, 10), 4..8);
         assert_eq!(line_range(text, 11, 10), 9..14);
     }
+
+    #[test]
+    fn test_get_line_trims() {
+        let text = b"   foo\nbar\n";
+        assert_eq!(line_range(text, 0, 10), 3..6);
+
+        let text = b"\t func foo\nbar\n";
+        assert_eq!(line_range(text, 0, 10), 2..10);
+    }
 }

From 7b2514a6108593f9da31b4bb6638a145bfa77b51 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 15:12:16 -0700
Subject: [PATCH 12/71] Whitespace

---
 tags/src/lib.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 32eaa0d9..d0746b3d 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -28,7 +28,6 @@ pub struct TagsConfiguration {
     pattern_info: Vec<PatternInfo>,
 }
 
-
 #[derive(Debug)]
 pub struct NamedCapture {
     pub syntax_type: SyntaxType,

From 819b800cf973418c7dbd73e628ae26401d618580 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 15:54:29 -0700
Subject: [PATCH 13/71] Pick up the proper initial index and test

---
 tags/src/lib.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index d0746b3d..d57e3fb5 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -500,7 +500,7 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range<usize> {
 fn trim_start(text: &[u8], r: Range<usize>) -> Range<usize> {
     for (index, c) in text[r.start..r.end].iter().enumerate() {
         if !c.is_ascii_whitespace(){
-            return index..r.end
+            return (r.start+index)..r.end
         }
     }
     return r
@@ -530,5 +530,6 @@ mod tests {
 
         let text = b"\t func foo\nbar\n";
         assert_eq!(line_range(text, 0, 10), 2..10);
+        assert_eq!(line_range(text, 11, 10), 11..14);
     }
 }

From f24a952cb48706cf3134ad8da505462098b65348 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Wed, 17 Jun 2020 15:54:36 -0700
Subject: [PATCH 14/71] Minor output changes

---
 cli/src/tags.rs | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/cli/src/tags.rs b/cli/src/tags.rs
index 4869b8cc..3493f616 100644
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@@ -34,20 +34,27 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
         };
 
         if let Some(tags_config) = language_config.tags_config(language)? {
-            let path_str = format!("{:?}", path);
-            writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?;
+            let ident = if paths.len() > 1 {
+                let path_str = format!("{:?}", path);
+                writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?;
+                "\t"
+            } else {
+                ""
+            };
 
             let source = fs::read(path)?;
             for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? {
                 let tag = tag?;
                 write!(
                     &mut stdout,
-                    "  {:<8} {:<40}\t [{}] {:>9}-{:<9}",
-                    tag.syntax_type,
+                    "{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
+                    ident,
                     str::from_utf8(&source[tag.name_range]).unwrap_or(""),
+                    tag.syntax_type,
                     if tag.is_definition { "def" } else { "ref" },
                     tag.span.start,
                     tag.span.end,
+                    str::from_utf8(&source[tag.line_range]).unwrap_or(""),
                 )?;
                 if let Some(docs) = tag.docs {
                     if docs.len() > 120 {

From 016ad53a2f4f5a79ef4164eaf57a13e5147eb53a Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 07:40:48 -0700
Subject: [PATCH 15/71] Trim end of lines as well

---
 tags/src/lib.rs | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index d57e3fb5..1959c753 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -494,18 +494,27 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range<usize> {
     let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1);
     let max_line_len = max_line_len.min(text.len() - start);
     let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len);
-    trim_start(text, start..end)
+    trim_end(text, trim_start(text, start..end))
 }
 
 fn trim_start(text: &[u8], r: Range<usize>) -> Range<usize> {
     for (index, c) in text[r.start..r.end].iter().enumerate() {
-        if !c.is_ascii_whitespace(){
+        if !c.is_ascii_whitespace() {
             return (r.start+index)..r.end
         }
     }
     return r
 }
 
+fn trim_end(text: &[u8], r: Range<usize>) -> Range<usize> {
+    for (index, c) in text[r.start..r.end].iter().rev().enumerate() {
+        if !c.is_ascii_whitespace() {
+            return r.start..(r.end-index)
+        }
+    }
+    return r
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -528,8 +537,15 @@ mod tests {
         let text = b"   foo\nbar\n";
         assert_eq!(line_range(text, 0, 10), 3..6);
 
-        let text = b"\t func foo\nbar\n";
+        let text = b"\t func foo \nbar\n";
         assert_eq!(line_range(text, 0, 10), 2..10);
-        assert_eq!(line_range(text, 11, 10), 11..14);
+
+        let r = line_range(text, 0, 14);
+        assert_eq!(r, 2..10);
+        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo");
+
+        let r = line_range(text, 12, 14);
+        assert_eq!(r, 12..15);
+        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar");
     }
 }

From 3bcb1f8c9405f77242a0c2f46dabfe4c8e59b53d Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 10:48:33 -0700
Subject: [PATCH 16/71] Assert line trimming

---
 cli/src/tests/tags_test.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index b6283507..02d06ff6 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -108,10 +108,10 @@ fn test_tags_python() {
         ]
     );
 
-    assert_eq!(substr(source, &tags[0].line_range), "    class Customer:");
+    assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
     assert_eq!(
         substr(source, &tags[1].line_range),
-        "        def age(self):"
+        "def age(self):"
     );
     assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
     assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");

From 54586c4e5bf5536bf075558b0529f4518f348676 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 14:42:30 -0700
Subject: [PATCH 17/71] Named captures are dynamic

New c api for getting list of syntax_type names.
---
 tags/include/tree_sitter/tags.h |  16 ++---
 tags/src/c_lib.rs               |  46 +++++++-------
 tags/src/lib.rs                 | 105 +++++++++++++-------------------
 3 files changed, 69 insertions(+), 98 deletions(-)

diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h
index e1ed68bd..f6113a0f 100644
--- a/tags/include/tree_sitter/tags.h
+++ b/tags/include/tree_sitter/tags.h
@@ -18,17 +18,6 @@ typedef enum {
   TSTagsInvalidQuery,
 } TSTagsError;
 
-typedef enum {
-  TSSyntaxTypeFunction,
-  TSSyntaxTypeMethod,
-  TSSyntaxTypeClass,
-  TSSyntaxTypeModule,
-  TSSyntaxTypeCall,
-  TSSyntaxTypeType,
-  TSSyntaxTypeInterface,
-  TSSyntaxTypeImplementation,
-} TSTagSyntaxType;
-
 typedef struct {
   uint32_t start_byte;
   uint32_t end_byte;
@@ -40,7 +29,7 @@ typedef struct {
   TSPoint end_point;
   uint32_t docs_start_byte;
   uint32_t docs_end_byte;
-  TSTagSyntaxType syntax_type;
+  uint32_t syntax_type_id;
   bool is_definition;
 } TSTag;
 
@@ -93,6 +82,9 @@ uint32_t ts_tags_buffer_tags_len(const TSTagsBuffer *);
 const char *ts_tags_buffer_docs(const TSTagsBuffer *);
 uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *);
 
+// Get the syntax kinds for a scope.
+const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index 72c708d0..6dc48195 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -1,4 +1,4 @@
-use super::{Error, SyntaxType, TagsConfiguration, TagsContext};
+use super::{Error, TagsConfiguration, TagsContext};
 use std::collections::HashMap;
 use std::ffi::CStr;
 use std::process::abort;
@@ -19,19 +19,6 @@ pub enum TSTagsError {
     Unknown,
 }
 
-#[repr(C)]
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum TSSyntaxType {
-    Function,
-    Method,
-    Class,
-    Module,
-    Call,
-    Type,
-    Interface,
-    Implementation,
-}
-
 #[repr(C)]
 pub struct TSPoint {
     row: u32,
@@ -50,7 +37,7 @@ pub struct TSTag {
     pub end_point: TSPoint,
     pub docs_start_byte: u32,
     pub docs_end_byte: u32,
-    pub syntax_type: TSSyntaxType,
+    pub syntax_type_id: u32,
     pub is_definition: bool,
 }
 
@@ -173,16 +160,7 @@ pub extern "C" fn ts_tagger_tag(
                 },
                 docs_start_byte: prev_docs_len as u32,
                 docs_end_byte: buffer.docs.len() as u32,
-                syntax_type: match tag.syntax_type {
-                    SyntaxType::Function => TSSyntaxType::Function,
-                    SyntaxType::Method => TSSyntaxType::Method,
-                    SyntaxType::Class => TSSyntaxType::Class,
-                    SyntaxType::Module => TSSyntaxType::Module,
-                    SyntaxType::Call => TSSyntaxType::Call,
-                    SyntaxType::Type => TSSyntaxType::Type,
-                    SyntaxType::Interface => TSSyntaxType::Interface,
-                    SyntaxType::Implementation => TSSyntaxType::Implementation,
-                },
+                syntax_type_id: tag.syntax_type_id,
                 is_definition: tag.is_definition,
             });
         }
@@ -231,6 +209,24 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 {
     buffer.docs.len() as u32
 }
 
+#[no_mangle]
+pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name(
+    this: *mut TSTagger,
+    scope_name: *const i8,
+    len: *mut u32,
+) -> *const *const i8 {
+    let tagger = unwrap_mut_ptr(this);
+    let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
+    let len = unwrap_mut_ptr(len);
+
+    *len = 0;
+    if let Some(config) = tagger.languages.get(scope_name) {
+        *len = config.c_syntax_type_names.len() as u32;
+        return config.c_syntax_type_names.as_ptr() as *const *const i8
+    }
+    std::ptr::null()
+}
+
 fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
     unsafe { result.as_ref() }.unwrap_or_else(|| {
         eprintln!("{}:{} - pointer must not be null", file!(), line!());
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 1959c753..3d5ce770 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -5,6 +5,7 @@ use regex::Regex;
 use std::ops::Range;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::{fmt, mem, str};
+use std::ffi::CStr;
 use std::collections::HashMap;
 use tree_sitter::{
     Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
@@ -19,6 +20,8 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100;
 pub struct TagsConfiguration {
     pub language: Language,
     pub query: Query,
+    syntax_type_names: Vec<Box<[u8]>>,
+    c_syntax_type_names: Vec<*const u8>,
     capture_map: HashMap<u32, NamedCapture>,
     doc_capture_index: Option<u32>,
     name_capture_index: Option<u32>,
@@ -30,24 +33,10 @@ pub struct TagsConfiguration {
 
 #[derive(Debug)]
 pub struct NamedCapture {
-    pub syntax_type: SyntaxType,
+    pub syntax_type_id: u32,
     pub is_definition: bool,
 }
 
-// Should stay in sync with list of valid syntax types in semantic.
-// See: https://github.com/github/semantic/blob/621696f5bc523a651f1cf9fc2ac58c557ea02d07/proto/semantic.proto#L165-L174
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum SyntaxType {
-    Function,
-    Method,
-    Class,
-    Module,
-    Call,
-    Type,
-    Interface,
-    Implementation,
-}
-
 pub struct TagsContext {
     parser: Parser,
     cursor: QueryCursor,
@@ -61,7 +50,7 @@ pub struct Tag {
     pub span: Range<Point>,
     pub docs: Option<String>,
     pub is_definition: bool,
-    pub syntax_type: SyntaxType,
+    pub syntax_type_id: u32,
 }
 
 #[derive(Debug, PartialEq)]
@@ -70,6 +59,7 @@ pub enum Error {
     Regex(regex::Error),
     Cancelled,
     InvalidLanguage,
+    InvalidCapture(String),
 }
 
 #[derive(Debug, Default)]
@@ -120,11 +110,13 @@ impl TagsConfiguration {
             }
         }
 
-        let mut capture_map: HashMap<u32, NamedCapture> = HashMap::new();
+        let mut capture_map = HashMap::new();
+        let mut syntax_type_names = Vec::new();
         let mut doc_capture_index = None;
         let mut name_capture_index = None;
         let mut local_scope_capture_index = None;
         let mut local_definition_capture_index = None;
+        let mut syntax_type_id = 0;
         for (i, name) in query.capture_names().iter().enumerate() {
             match name.as_str() {
                 "" => continue,
@@ -132,12 +124,32 @@ impl TagsConfiguration {
                 "doc" => doc_capture_index = Some(i as u32),
                 "local.scope" => local_scope_capture_index = Some(i as u32),
                 "local.definition" => local_definition_capture_index = Some(i as u32),
-                _ => if let Some(nc) = NamedCapture::new(name) {
-                    capture_map.insert(i as u32, nc);
+                "local.reference" => continue,
+                _ => {
+                    let mut is_definition = false;
+
+                    let kind = if name.starts_with("definition.") {
+                        is_definition = true;
+                        name.trim_start_matches("definition.")
+                    } else if name.starts_with("reference.") {
+                        name.trim_start_matches("reference.")
+                    } else {
+                        return Err(Error::InvalidCapture(name.to_string()))
+                    }.to_string()+"\0";
+
+                    capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition });
+                    syntax_type_id+=1;
+                    if let Ok(cstr) = CStr::from_bytes_with_nul(kind.as_bytes()) {
+                        syntax_type_names.push(cstr.to_bytes_with_nul().to_vec().into_boxed_slice());
+                    }
                 }
             }
         }
 
+        let c_syntax_type_names = syntax_type_names.iter().map( |s| {
+            s.as_ptr()
+        }).collect();
+
         let pattern_info = (0..query.pattern_count())
             .map(|pattern_index| {
                 let mut info = PatternInfo::default();
@@ -182,6 +194,8 @@ impl TagsConfiguration {
         Ok(TagsConfiguration {
             language,
             query,
+            syntax_type_names,
+            c_syntax_type_names,
             capture_map,
             doc_capture_index,
             name_capture_index,
@@ -191,6 +205,13 @@ impl TagsConfiguration {
             pattern_info,
         })
     }
+
+    pub fn syntax_type_name(&self, id: u32) -> &str {
+        unsafe {
+            let cstr = CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const i8).to_bytes();
+            str::from_utf8(cstr).expect("syntax type name was not valid utf-8")
+        }
+    }
 }
 
 impl TagsContext {
@@ -301,7 +322,7 @@ where
                 let mut name_range = None;
                 let mut doc_nodes = Vec::new();
                 let mut tag_node = None;
-                let mut syntax_type = SyntaxType::Function;
+                let mut syntax_type_id = 0;
                 let mut is_definition = false;
                 let mut docs_adjacent_node = None;
 
@@ -320,7 +341,7 @@ where
 
                     if let Some(named_capture) = self.config.capture_map.get(&capture.index) {
                         tag_node = Some(capture.node);
-                        syntax_type = named_capture.syntax_type;
+                        syntax_type_id = named_capture.syntax_type_id;
                         is_definition = named_capture.is_definition;
                     }
                 }
@@ -407,7 +428,7 @@ where
                                     range,
                                     name_range,
                                     docs,
-                                    syntax_type,
+                                    syntax_type_id,
                                     is_definition,
                                 };
                             }
@@ -421,7 +442,7 @@ where
                                     range,
                                     name_range,
                                     docs,
-                                    syntax_type,
+                                    syntax_type_id,
                                     is_definition,
                                 },
                                 mat.pattern_index,
@@ -440,44 +461,6 @@ where
     }
 }
 
-impl NamedCapture {
-    pub fn new(name: &String) -> Option<NamedCapture> {
-        let mut is_definition = false;
-
-        let kind = if name.starts_with("definition.") {
-            is_definition = true;
-            name.trim_start_matches("definition.")
-        } else if name.starts_with("reference.") {
-            name.trim_start_matches("reference.")
-        } else {
-            name
-        };
-
-        let syntax_type = match kind.as_ref() {
-            "function" => {is_definition = true; SyntaxType::Function},
-            "method" => {is_definition = true; SyntaxType::Method},
-            "class" => {is_definition = true; SyntaxType::Class},
-            "module" => {is_definition = true; SyntaxType::Module},
-            "call" => SyntaxType::Call,
-            "type" => SyntaxType::Type,
-            "interface" => SyntaxType::Interface,
-            "implementation" => SyntaxType::Implementation,
-            _ => return None,
-        };
-
-        return Some(NamedCapture{
-            syntax_type,
-            is_definition
-        })
-    }
-}
-
-impl fmt::Display for SyntaxType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        format!("{:?}", self).fmt(f)
-    }
-}
-
 impl From<regex::Error> for Error {
     fn from(error: regex::Error) -> Self {
         Error::Regex(error)

From 75724698f0b668b6511b8dcf4bf718733abfffb5 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 14:42:41 -0700
Subject: [PATCH 18/71] Fix up tests

---
 cli/src/tests/tags_test.rs | 60 +++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index 02d06ff6..cc339e0a 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -2,8 +2,9 @@ use super::helpers::allocations;
 use super::helpers::fixtures::{get_language, get_language_queries_path};
 use std::ffi::CString;
 use std::{fs, ptr, slice, str};
+use std::ffi::CStr;
 use tree_sitter_tags::c_lib as c;
-use tree_sitter_tags::{Error, SyntaxType, TagsConfiguration, TagsContext};
+use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};
 
 const PYTHON_TAG_QUERY: &'static str = r#"
 (
@@ -97,14 +98,15 @@ fn test_tags_python() {
         .collect::<Result<Vec<_>, _>>()
         .unwrap();
 
+
     assert_eq!(
         tags.iter()
-            .map(|t| (substr(source, &t.name_range), t.syntax_type))
+            .map(|t| (substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id)))
             .collect::<Vec<_>>(),
         &[
-            ("Customer", SyntaxType::Class),
-            ("age", SyntaxType::Function),
-            ("compute_age", SyntaxType::Call),
+            ("Customer", "class"),
+            ("age", "function"),
+            ("compute_age", "call"),
         ]
     );
 
@@ -150,12 +152,12 @@ fn test_tags_javascript() {
 
     assert_eq!(
         tags.iter()
-            .map(|t| (substr(source, &t.name_range), t.syntax_type))
+            .map(|t| (substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id)))
             .collect::<Vec<_>>(),
         &[
-            ("Customer", SyntaxType::Class),
-            ("getAge", SyntaxType::Method),
-            ("Agent", SyntaxType::Class)
+            ("Customer", "class"),
+            ("getAge", "method"),
+            ("Agent", "class")
         ]
     );
     assert_eq!(
@@ -204,18 +206,18 @@ fn test_tags_ruby() {
         tags.iter()
             .map(|t| (
                 substr(source.as_bytes(), &t.name_range),
-                t.syntax_type,
+                tags_config.syntax_type_name(t.syntax_type_id),
                 (t.span.start.row, t.span.start.column),
             ))
             .collect::<Vec<_>>(),
         &[
-            ("foo", SyntaxType::Method, (2, 0)),
-            ("bar", SyntaxType::Call, (7, 4)),
-            ("a", SyntaxType::Call, (7, 8)),
-            ("b", SyntaxType::Call, (7, 11)),
-            ("each", SyntaxType::Call, (9, 14)),
-            ("baz", SyntaxType::Call, (13, 8)),
-            ("b", SyntaxType::Call, (13, 15),),
+            ("foo", "method", (2, 0)),
+            ("bar", "call", (7, 4)),
+            ("a", "call", (7, 8)),
+            ("b", "call", (7, 11)),
+            ("each", "call", (9, 14)),
+            ("baz", "call", (13, 8)),
+            ("b", "call", (13, 15),),
         ]
     );
 }
@@ -253,6 +255,14 @@ fn test_tags_cancellation() {
     });
 }
 
+#[test]
+fn test_invalid_cpature() {
+    let language = get_language("python");
+    let e = TagsConfiguration::new(language, "(identifier) @method", "")
+        .expect_err("expected InvalidCapture error");
+    assert_eq!(e, Error::InvalidCapture("method".to_string()));
+}
+
 #[test]
 fn test_tags_via_c_api() {
     allocations::record(|| {
@@ -316,10 +326,18 @@ fn test_tags_via_c_api() {
         })
         .unwrap();
 
+        let syntax_types: Vec<&str> = unsafe {
+            let mut len: u32 = 0;
+            let ptr = c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
+            slice::from_raw_parts(ptr, len as usize).iter().map(|i| {
+                CStr::from_ptr(*i).to_str().unwrap()
+            }).collect()
+        };
+
         assert_eq!(
             tags.iter()
                 .map(|tag| (
-                    tag.syntax_type,
+                    syntax_types[tag.syntax_type_id as usize],
                     &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
                     &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
                     &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
@@ -327,18 +345,18 @@ fn test_tags_via_c_api() {
                 .collect::<Vec<_>>(),
             &[
                 (
-                    c::TSSyntaxType::Function,
+                    "function",
                     "b",
                     "function b() {",
                     "one\ntwo\nthree"
                 ),
                 (
-                    c::TSSyntaxType::Class,
+                    "class",
                     "C",
                     "class C extends D {",
                     "four\nfive"
                 ),
-                (c::TSSyntaxType::Call, "b", "b(a);", "")
+                ("call", "b", "b(a);", "")
             ]
         );
 

From b6ae67a6100a7c1fa6a249a2b4e0ff04378a41b5 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 14:43:10 -0700
Subject: [PATCH 19/71] Fix up CLI, use new syntax_type_name

---
 cli/src/tags.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/src/tags.rs b/cli/src/tags.rs
index 3493f616..515f4c52 100644
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@@ -50,7 +50,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
                     "{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
                     ident,
                     str::from_utf8(&source[tag.name_range]).unwrap_or(""),
-                    tag.syntax_type,
+                    &tags_config.syntax_type_name(tag.syntax_type_id),
                     if tag.is_definition { "def" } else { "ref" },
                     tag.span.start,
                     tag.span.end,

From 17d26c0d5a5d2b836a0b5f77414c007572589b97 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 14:43:27 -0700
Subject: [PATCH 20/71] Improved errors

---
 cli/src/error.rs | 2 +-
 tags/src/lib.rs  | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/cli/src/error.rs b/cli/src/error.rs
index 824bd92f..d583d1b9 100644
--- a/cli/src/error.rs
+++ b/cli/src/error.rs
@@ -83,7 +83,7 @@ impl<'a> From<tree_sitter_highlight::Error> for Error {
 
 impl<'a> From<tree_sitter_tags::Error> for Error {
     fn from(error: tree_sitter_tags::Error) -> Self {
-        Error::new(format!("{:?}", error))
+        Error::new(format!("{}", error))
     }
 }
 
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 3d5ce770..07fed3af 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -62,6 +62,15 @@ pub enum Error {
     InvalidCapture(String),
 }
 
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name),
+            _ => write!(f, "{:?}", self)
+        }
+    }
+}
+
 #[derive(Debug, Default)]
 struct PatternInfo {
     docs_adjacent_capture: Option<u32>,

From ef15f4df24af34f685eefc630b2af69b1ee661b2 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 15:05:08 -0700
Subject: [PATCH 21/71] Dedupe items in syntax_type_names

---
 tags/src/lib.rs | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 07fed3af..128a01cf 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -5,7 +5,7 @@ use regex::Regex;
 use std::ops::Range;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::{fmt, mem, str};
-use std::ffi::CStr;
+use std::ffi::{CStr, CString};
 use std::collections::HashMap;
 use tree_sitter::{
     Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
@@ -125,7 +125,6 @@ impl TagsConfiguration {
         let mut name_capture_index = None;
         let mut local_scope_capture_index = None;
         let mut local_definition_capture_index = None;
-        let mut syntax_type_id = 0;
         for (i, name) in query.capture_names().iter().enumerate() {
             match name.as_str() {
                 "" => continue,
@@ -144,12 +143,15 @@ impl TagsConfiguration {
                         name.trim_start_matches("reference.")
                     } else {
                         return Err(Error::InvalidCapture(name.to_string()))
-                    }.to_string()+"\0";
+                    };
 
-                    capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition });
-                    syntax_type_id+=1;
-                    if let Ok(cstr) = CStr::from_bytes_with_nul(kind.as_bytes()) {
-                        syntax_type_names.push(cstr.to_bytes_with_nul().to_vec().into_boxed_slice());
+                    if let Ok(cstr) = CString::new(kind) {
+                        let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice();
+                        let syntax_type_id = syntax_type_names.iter().position(|n| { n == &c_kind }).unwrap_or_else(|| {
+                            syntax_type_names.push(c_kind);
+                            syntax_type_names.len() - 1
+                        }) as u32;
+                        capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition });
                     }
                 }
             }

From f166947abb3fa834463dfb21b0044d30b0617795 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 15:05:27 -0700
Subject: [PATCH 22/71] Test updates, definition/reference prefix is now
 required

---
 cli/src/tests/tags_test.rs | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index cc339e0a..540e2b01 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -10,65 +10,65 @@ const PYTHON_TAG_QUERY: &'static str = r#"
 (
     (function_definition
       name: (identifier) @name
-      body: (block . (expression_statement (string) @doc))) @function
+      body: (block . (expression_statement (string) @doc))) @definition.function
     (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
 )
 
 (function_definition
-  name: (identifier) @name) @function
+  name: (identifier) @name) @definition.function
 
 (
     (class_definition
         name: (identifier) @name
         body: (block
-            . (expression_statement (string) @doc))) @class
+            . (expression_statement (string) @doc))) @definition.class
     (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
 )
 
 (class_definition
-  name: (identifier) @name) @class
+  name: (identifier) @name) @definition.class
 
 (call
-  function: (identifier) @name) @call
+  function: (identifier) @name) @reference.call
 "#;
 
 const JS_TAG_QUERY: &'static str = r#"
 (
     (comment)* @doc .
     (class_declaration
-        name: (identifier) @name) @class
-    (#select-adjacent! @doc @class)
+        name: (identifier) @name) @definition.class
+    (#select-adjacent! @doc @definition.class)
     (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
 )
 
 (
     (comment)* @doc .
     (method_definition
-        name: (property_identifier) @name) @method
-    (#select-adjacent! @doc @method)
+        name: (property_identifier) @name) @definition.method
+    (#select-adjacent! @doc @definition.method)
     (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
 )
 
 (
     (comment)* @doc .
     (function_declaration
-        name: (identifier) @name) @function
-    (#select-adjacent! @doc @function)
+        name: (identifier) @name) @definition.function
+    (#select-adjacent! @doc @definition.function)
     (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
 )
 
 (call_expression
-    function: (identifier) @name) @call
+    function: (identifier) @name) @reference.call
 "#;
 
 const RUBY_TAG_QUERY: &'static str = r#"
 (method
-    name: (identifier) @name) @method
+    name: (identifier) @name) @definition.method
 
 (method_call
-    method: (identifier) @name) @call
+    method: (identifier) @name) @reference.call
 
-((identifier) @name @call
+((identifier) @name @reference.call
  (#is-not? local))
 "#;
 
@@ -256,7 +256,7 @@ fn test_tags_cancellation() {
 }
 
 #[test]
-fn test_invalid_cpature() {
+fn test_invalid_capture() {
     let language = get_language("python");
     let e = TagsConfiguration::new(language, "(identifier) @method", "")
         .expect_err("expected InvalidCapture error");

From d9d3da994218339e525925b6cfda81247a22c001 Mon Sep 17 00:00:00 2001
From: Timothy Clem <timothy.clem@gmail.com>
Date: Thu, 18 Jun 2020 16:04:05 -0700
Subject: [PATCH 23/71] Fill out rest of c errors

---
 tags/include/tree_sitter/tags.h | 1 +
 tags/src/c_lib.rs               | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h
index f6113a0f..58f5bbd9 100644
--- a/tags/include/tree_sitter/tags.h
+++ b/tags/include/tree_sitter/tags.h
@@ -16,6 +16,7 @@ typedef enum {
   TSTagsInvalidUtf8,
   TSTagsInvalidRegex,
   TSTagsInvalidQuery,
+  TSTagsInvalidCapture,
 } TSTagsError;
 
 typedef struct {
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index 6dc48195..77f8aae5 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -16,6 +16,7 @@ pub enum TSTagsError {
     InvalidUtf8,
     InvalidRegex,
     InvalidQuery,
+    InvalidCapture,
     Unknown,
 }
 
@@ -93,7 +94,9 @@ pub extern "C" fn ts_tagger_add_language(
         }
         Err(Error::Query(_)) => TSTagsError::InvalidQuery,
         Err(Error::Regex(_)) => TSTagsError::InvalidRegex,
-        Err(_) => TSTagsError::Unknown,
+        Err(Error::Cancelled) => TSTagsError::Timeout,
+        Err(Error::InvalidLanguage) => TSTagsError::InvalidLanguage,
+        Err(Error::InvalidCapture(_)) => TSTagsError::InvalidCapture,
     }
 }
 

From 0438ed03ffbb4db86283ae3fcea3529971f1715b Mon Sep 17 00:00:00 2001
From: intrigus-lgtm <60750685+intrigus-lgtm@users.noreply.github.com>
Date: Mon, 6 Jul 2020 22:47:10 +0200
Subject: [PATCH 24/71] Fix wrong file name (#666)

"build_fuzzers" -> "build-fuzzers".
It should be a hypen and not an underscore.
---
 test/fuzz/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/fuzz/README.md b/test/fuzz/README.md
index 649d2d89..a02d2689 100644
--- a/test/fuzz/README.md
+++ b/test/fuzz/README.md
@@ -22,10 +22,10 @@ The fuzzers can then be built with:
 export CLANG_DIR=$HOME/src/third_party/llvm-build/Release+Asserts/bin
 CC="$CLANG_DIR/clang" CXX="$CLANG_DIR/clang++" LINK="$CLANG_DIR/clang++" \
   LIB_FUZZER_PATH=$HOME/src/compiler-rt/lib/fuzzer/libFuzzer.a \
-  ./script/build_fuzzers
+  ./script/build-fuzzers
 ```
 
-This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build_fuzzers python ruby`.
+This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build-fuzzers python ruby`.
 
 The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments:
 ```

From 0bf2450b4aa26e79d9fcb1e2007e183ff14d2424 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 6 Jul 2020 15:56:21 -0700
Subject: [PATCH 25/71] Always enforce stack version limit during reductions

Fixes #669
---
 cli/src/tests/mod.rs               |  1 +
 cli/src/tests/pathological_test.rs | 15 +++++++++++++++
 lib/src/parser.c                   | 18 ++++++++++++------
 3 files changed, 28 insertions(+), 6 deletions(-)
 create mode 100644 cli/src/tests/pathological_test.rs

diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs
index ac54db00..24e8160e 100644
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@@ -3,6 +3,7 @@ mod helpers;
 mod highlight_test;
 mod node_test;
 mod parser_test;
+mod pathological_test;
 mod query_test;
 mod tags_test;
 mod test_highlight_test;
diff --git a/cli/src/tests/pathological_test.rs b/cli/src/tests/pathological_test.rs
new file mode 100644
index 00000000..7ebd5439
--- /dev/null
+++ b/cli/src/tests/pathological_test.rs
@@ -0,0 +1,15 @@
+use super::helpers::allocations;
+use super::helpers::fixtures::get_language;
+use tree_sitter::Parser;
+
+#[test]
+fn test_pathological_example_1() {
+    let language = "cpp";
+    let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
+
+    allocations::record(|| {
+        let mut parser = Parser::new();
+        parser.set_language(get_language(language)).unwrap();
+        parser.parse(source, None).unwrap();
+    });
+}
diff --git a/lib/src/parser.c b/lib/src/parser.c
index e9c16f0c..4d7dc1e5 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -764,17 +764,23 @@ static StackVersion ts_parser__reduce(
   bool is_extra
 ) {
   uint32_t initial_version_count = ts_stack_version_count(self->stack);
-  uint32_t removed_version_count = 0;
-  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
 
+  // Pop the given number of nodes from the given version of the parse stack.
+  // If stack versions have previously merged, then there may be more than one
+  // path back through the stack. For each path, create a new parent node to
+  // contain the popped children, and push it onto the stack in place of the
+  // children.
+  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
+  uint32_t removed_version_count = 0;
   for (uint32_t i = 0; i < pop.size; i++) {
     StackSlice slice = pop.contents[i];
     StackVersion slice_version = slice.version - removed_version_count;
 
-    // Error recovery can sometimes cause lots of stack versions to merge,
-    // such that a single pop operation can produce a lots of slices.
-    // Avoid creating too many stack versions in that situation.
-    if (i > 0 && slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
+    // This is where new versions are added to the parse stack. The versions
+    // will all be sorted and truncated at the end of the outer parsing loop.
+    // Allow the maximum version count to be temporarily exceeded, but only
+    // by a limited threshold.
+    if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
       ts_stack_remove_version(self->stack, slice_version);
       ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
       removed_version_count++;

From 86a5dabbcbdac650c53a889183bf56d7e721e09e Mon Sep 17 00:00:00 2001
From: Jacob Gillespie <jacobwgillespie@gmail.com>
Date: Tue, 7 Jul 2020 16:45:23 +0100
Subject: [PATCH 26/71] Add TypeScript definition for DSL (#658)

---
 cli/npm/dsl.d.ts | 356 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 356 insertions(+)
 create mode 100644 cli/npm/dsl.d.ts

diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts
new file mode 100644
index 00000000..b9bf1c98
--- /dev/null
+++ b/cli/npm/dsl.d.ts
@@ -0,0 +1,356 @@
+type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string};
+type BlankRule = {type: 'BLANK'};
+type ChoiceRule = {type: 'CHOICE'; members: Rule[]};
+type FieldRule = {type: 'FIELD'; name: string; content: Rule};
+type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule};
+type PatternRule = {type: 'PATTERN'; value: string};
+type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number};
+type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number};
+type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number};
+type PrecRule = {type: 'PREC'; content: Rule; value: number};
+type Repeat1Rule = {type: 'REPEAT1'; content: Rule};
+type RepeatRule = {type: 'REPEAT'; content: Rule};
+type SeqRule = {type: 'SEQ'; members: Rule[]};
+type StringRule = {type: 'STRING'; value: string};
+type SymbolRule<Name extends string> = {type: 'SYMBOL'; name: Name};
+type TokenRule = {type: 'TOKEN'; content: Rule};
+
+type Rule =
+  | AliasRule
+  | BlankRule
+  | ChoiceRule
+  | FieldRule
+  | ImmediateTokenRule
+  | PatternRule
+  | PrecDynamicRule
+  | PrecLeftRule
+  | PrecRightRule
+  | PrecRule
+  | Repeat1Rule
+  | RepeatRule
+  | SeqRule
+  | StringRule
+  | SymbolRule<string>
+  | TokenRule;
+
+type RuleOrLiteral = Rule | RegExp | string;
+
+type GrammarSymbols<RuleName extends string> = {
+  [name in RuleName]: SymbolRule<name>;
+} &
+  Record<string, SymbolRule<string>>;
+
+type RuleBuilder<RuleName extends string> = (
+  $: GrammarSymbols<RuleName>,
+) => RuleOrLiteral;
+
+type RuleBuilders<
+  RuleName extends string,
+  BaseGrammarRuleName extends string
+> = {
+  [name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
+};
+
+interface Grammar<
+  RuleName extends string,
+  BaseGrammarRuleName extends string = never,
+  Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
+    RuleName,
+    BaseGrammarRuleName
+  >
+> {
+  /**
+   * Name of the grammar language.
+   */
+  name: string;
+
+  /** Mapping of grammar rule names to rule builder functions. */
+  rules: Rules;
+
+  /**
+   * An array of arrays of rule names. Each inner array represents a set of
+   * rules that's involved in an _LR(1) conflict_ that is _intended to exist_
+   * in the grammar. When these conflicts occur at runtime, Tree-sitter will
+   * use the GLR algorithm to explore all of the possible interpretations. If
+   * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
+   * whose corresponding rule has the highest total _dynamic precedence_.
+   *
+   * @param $ grammar rules
+   */
+  conflicts?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[][];
+
+  /**
+   * An array of token names which can be returned by an _external scanner_.
+   * External scanners allow you to write custom C code which runs during the
+   * lexing process in order to handle lexical rules (e.g. Python's indentation
+   * tokens) that cannot be described by regular expressions.
+   *
+   * @param $ grammar rules
+   * @param previous array of externals from the base schema, if any
+   *
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
+   */
+  externals?: (
+    $: Record<string, SymbolRule<string>>,
+    previous: Rule[],
+  ) => SymbolRule<string>[];
+
+  /**
+   * An array of tokens that may appear anywhere in the language. This
+   * is often used for whitespace and comments. The default value of
+   * extras is to accept whitespace. To control whitespace explicitly,
+   * specify extras: `$ => []` in your grammar.
+   *
+   *  @param $ grammar rules
+   */
+  extras?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * An array of rules that should be automatically removed from the
+   * grammar by replacing all of their usages with a copy of their definition.
+   * This is useful for rules that are used in multiple places but for which
+   * you don't want to create syntax tree nodes at runtime.
+   *
+   * @param $ grammar rules
+   */
+  inline?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * A list of hidden rule names that should be considered supertypes in the
+   * generated node types file.
+   *
+   * @param $ grammar rules
+   *
+   * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
+   */
+  supertypes?: (
+    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
+  ) => RuleOrLiteral[];
+
+  /**
+   * The name of a token that will match keywords for the purpose of the
+   * keyword extraction optimization.
+   *
+   * @param $ grammar rules
+   *
+   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
+   */
+  word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
+}
+
+type GrammarSchema<RuleName extends string> = {
+  [K in keyof Grammar<RuleName>]: K extends 'rules'
+    ? Record<RuleName, Rule>
+    : Grammar<RuleName>[K];
+};
+
+/**
+ * Causes the given rule to appear with an alternative name in the syntax tree.
+ * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
+ * anonymous node, as if the rule had been written as the simple string.
+ *
+ * @param rule rule that will be aliased
+ * @param name target name for the alias
+ */
+declare function alias(rule: RuleOrLiteral, name: string): AliasRule;
+
+/**
+ * Causes the given rule to appear as an alternative named node, for instance
+ * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
+ * node called `bar`.
+ *
+ * @param rule rule that will be aliased
+ * @param symbol target symbol for the alias
+ */
+declare function alias(
+  rule: RuleOrLiteral,
+  symbol: SymbolRule<string>,
+): AliasRule;
+
+/**
+ * Creates a blank rule, matching nothing.
+ */
+declare function blank(): BlankRule;
+
+/**
+ * Assigns a field name to the child node(s) matched by the given rule.
+ * In the resulting syntax tree, you can then use that field name to
+ * access specific children.
+ *
+ * @param name name of the field
+ * @param rule rule the field should match
+ */
+declare function field(name: string, rule: RuleOrLiteral): FieldRule;
+
+/**
+ * Creates a rule that matches one of a set of possible rules. The order
+ * of the arguments does not matter. This is analogous to the `|` (pipe)
+ * operator in EBNF notation.
+ *
+ * @param options possible rule choices
+ */
+declare function choice(...options: RuleOrLiteral[]): ChoiceRule;
+
+/**
+ * Creates a rule that matches zero or one occurrence of a given rule.
+ * It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
+ *
+ * @param value rule to be made optional
+ */
+declare function optional(rule: RuleOrLiteral): ChoiceRule;
+
+/**
+ * Marks the given rule with a numerical precedence which will be used to
+ * resolve LR(1) conflicts at parser-generation time. When two rules overlap
+ * in a way that represents either a true ambiguity or a _local_ ambiguity
+ * given one token of lookahead, Tree-sitter will try to resolve the conflict by
+ * matching the rule with the higher precedence. The default precedence of all
+ * rules is zero. This works similarly to the precedence directives in Yacc grammars.
+ *
+ * @param number precedence weight
+ * @param rule rule being weighted
+ *
+ * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
+ * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+ */
+declare const prec: {
+  (number: number, rule: RuleOrLiteral): PrecRule;
+
+  /**
+   * Marks the given rule as left-associative (and optionally applies a
+   * numerical precedence). When an LR(1) conflict arises in which all of the
+   * rules have the same numerical precedence, Tree-sitter will consult the
+   * rules' associativity. If there is a left-associative rule, Tree-sitter
+   * will prefer matching a rule that ends _earlier_. This works similarly to
+   * associativity directives in Yacc grammars.
+   *
+   * @param number (optional) precedence weight
+   * @param rule rule to mark as left-associative
+   *
+   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+   */
+  left(rule: RuleOrLiteral): PrecLeftRule;
+  left(number: number, rule: RuleOrLiteral): PrecLeftRule;
+
+  /**
+   * Marks the given rule as right-associative (and optionally applies a
+   * numerical precedence). When an LR(1) conflict arises in which all of the
+   * rules have the same numerical precedence, Tree-sitter will consult the
+   * rules' associativity. If there is a right-associative rule, Tree-sitter
+   * will prefer matching a rule that ends _later_. This works similarly to
+   * associativity directives in Yacc grammars.
+   *
+   * @param number (optional) precedence weight
+   * @param rule rule to mark as right-associative
+   *
+   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
+   */
+  right(rule: RuleOrLiteral): PrecRightRule;
+  right(number: number, rule: RuleOrLiteral): PrecRightRule;
+
+  /**
+   * Marks the given rule with a numerical precedence which will be used to
+   * resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
+   * This is only necessary when handling a conflict dynamically using the
+   * `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
+   * multiple rules correctly match a given piece of code. In that event,
+   * Tree-sitter compares the total dynamic precedence associated with each
+   * rule, and selects the one with the highest total. This is similar to
+   * dynamic precedence directives in Bison grammars.
+   *
+   * @param number precedence weight
+   * @param rule rule being weighted
+   *
+   * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
+   */
+  dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule;
+};
+
+/**
+ * Creates a rule that matches _zero-or-more_ occurrences of a given rule.
+ * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
+ * rule is implemented in terms of `repeat1` but is included because it
+ * is very commonly used.
+ *
+ * @param rule rule to repeat, zero or more times
+ */
+declare function repeat(rule: RuleOrLiteral): RepeatRule;
+
+/**
+ * Creates a rule that matches one-or-more occurrences of a given rule.
+ *
+ * @param rule rule to repeat, one or more times
+ */
+declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
+
+/**
+ * Creates a rule that matches any number of other rules, one after another.
+ * It is analogous to simply writing multiple symbols next to each other
+ * in EBNF notation.
+ *
+ * @param rules ordered rules that comprise the sequence
+ */
+declare function seq(...rules: RuleOrLiteral[]): SeqRule;
+
+/**
+ * Creates a symbol rule, representing another rule in the grammar by name.
+ *
+ * @param name name of the target rule
+ */
+declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
+
+/**
+ * Marks the given rule as producing only a single token. Tree-sitter's
+ * default is to treat each String or RegExp literal in the grammar as a
+ * separate token. Each token is matched separately by the lexer and
+ * returned as its own leaf node in the tree. The token function allows
+ * you to express a complex rule using the DSL functions (rather
+ * than as a single regular expression) but still have Tree-sitter treat
+ * it as a single token.
+ *
+ * @param rule rule to represent as a single token
+ */
+declare const token: {
+  (rule: RuleOrLiteral): TokenRule;
+
+  /**
+   * Marks the given rule as producing an immediate token. This allows
+   * the parser to produce a different token based on whether or not
+   * there are `extras` preceding the token's main content. When there
+   * are _no_ leading `extras`, an immediate token is preferred over a
+   * normal token which would otherwise match.
+   *
+   * @param rule rule to represent as an immediate token
+   */
+  immediate(rule: RuleOrLiteral): ImmediateTokenRule;
+};
+
+/**
+ * Creates a new language grammar with the provided schema.
+ *
+ * @param options grammar options
+ */
+declare function grammar<RuleName extends string>(
+  options: Grammar<RuleName>,
+): GrammarSchema<RuleName>;
+
+/**
+ * Extends an existing language grammar with the provided options,
+ * creating a new language.
+ *
+ * @param baseGrammar base grammar schema to extend from
+ * @param options grammar options for the new extended language
+ */
+declare function grammar<
+  BaseGrammarRuleName extends string,
+  RuleName extends string
+>(
+  baseGrammar: GrammarSchema<BaseGrammarRuleName>,
+  options: Grammar<RuleName, BaseGrammarRuleName>,
+): GrammarSchema<RuleName | BaseGrammarRuleName>;

From d614c14c2cfc5911674f233ba7073c3dc3a90fdd Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 8 Jul 2020 12:36:59 -0700
Subject: [PATCH 27/71] tags: Make spans refer to name, not entire def/ref

Co-authored-by: Tim Clem <tclem@github.com>
Co-authored-by: Beka Valentine <bekavalentine@github.com>
---
 cli/src/tests/tags_test.rs | 23 ++++++++++++++++++-----
 tags/src/lib.rs            | 12 +++++++-----
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index fad8ebd8..f3df4b53 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -2,6 +2,7 @@ use super::helpers::allocations;
 use super::helpers::fixtures::{get_language, get_language_queries_path};
 use std::ffi::CString;
 use std::{fs, ptr, slice, str};
+use tree_sitter::Point;
 use tree_sitter_tags::c_lib as c;
 use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext};
 
@@ -150,12 +151,24 @@ fn test_tags_javascript() {
 
     assert_eq!(
         tags.iter()
-            .map(|t| (substr(source, &t.name_range), t.kind))
+            .map(|t| (substr(source, &t.name_range), t.span.clone(), t.kind))
             .collect::<Vec<_>>(),
         &[
-            ("Customer", TagKind::Class),
-            ("getAge", TagKind::Method),
-            ("Agent", TagKind::Class)
+            (
+                "Customer",
+                Point::new(5, 10)..Point::new(5, 18),
+                TagKind::Class
+            ),
+            (
+                "getAge",
+                Point::new(9, 8)..Point::new(9, 14),
+                TagKind::Method
+            ),
+            (
+                "Agent",
+                Point::new(15, 10)..Point::new(15, 15),
+                TagKind::Class
+            )
         ]
     );
     assert_eq!(
@@ -209,7 +222,7 @@ fn test_tags_ruby() {
             ))
             .collect::<Vec<_>>(),
         &[
-            ("foo", TagKind::Method, (2, 0)),
+            ("foo", TagKind::Method, (2, 4)),
             ("bar", TagKind::Call, (7, 4)),
             ("a", TagKind::Call, (7, 8)),
             ("b", TagKind::Call, (7, 11)),
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 8d1853bb..613e56ac 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -300,7 +300,7 @@ where
                     continue;
                 }
 
-                let mut name_range = None;
+                let mut name_node = None;
                 let mut doc_nodes = Vec::new();
                 let mut tag_node = None;
                 let mut kind = TagKind::Call;
@@ -314,7 +314,7 @@ where
                     }
 
                     if index == self.config.name_capture_index {
-                        name_range = Some(capture.node.byte_range());
+                        name_node = Some(capture.node);
                     } else if index == self.config.doc_capture_index {
                         doc_nodes.push(capture.node);
                     } else if index == self.config.call_capture_index {
@@ -335,7 +335,9 @@ where
                     }
                 }
 
-                if let (Some(tag_node), Some(name_range)) = (tag_node, name_range) {
+                if let (Some(tag_node), Some(name_node)) = (tag_node, name_node) {
+                    let name_range = name_node.byte_range();
+
                     if pattern_info.name_must_be_non_local {
                         let mut is_local = false;
                         for scope in self.scopes.iter().rev() {
@@ -413,7 +415,7 @@ where
                                 *pattern_index = mat.pattern_index;
                                 *tag = Tag {
                                     line_range: line_range(self.source, range.start, MAX_LINE_LEN),
-                                    span: tag_node.start_position()..tag_node.end_position(),
+                                    span: name_node.start_position()..name_node.end_position(),
                                     kind,
                                     range,
                                     name_range,
@@ -426,7 +428,7 @@ where
                             (
                                 Tag {
                                     line_range: line_range(self.source, range.start, MAX_LINE_LEN),
-                                    span: tag_node.start_position()..tag_node.end_position(),
+                                    span: name_node.start_position()..name_node.end_position(),
                                     kind,
                                     range,
                                     name_range,

From 255cf0a9cfe58654a40fd166dcbc3a0849073a22 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 8 Jul 2020 15:23:21 -0700
Subject: [PATCH 28/71] tags: Add utf16 column ranges to tags

Also, ensure that line ranges contain only valid UTF8.

Co-authored-by: Tim Clem <tclem@github.com>
Co-authored-by: Beka Valentine <bekavalentine@github.com>
---
 cli/src/tests/tags_test.rs |  42 +++++++---
 tags/src/lib.rs            | 159 +++++++++++++++++++++++++++----------
 2 files changed, 148 insertions(+), 53 deletions(-)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index f3df4b53..c81f6966 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -8,21 +8,21 @@ use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext};
 
 const PYTHON_TAG_QUERY: &'static str = r#"
 (
-    (function_definition
-      name: (identifier) @name
-      body: (block . (expression_statement (string) @doc))) @function
-    (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+  (function_definition
+    name: (identifier) @name
+    body: (block . (expression_statement (string) @doc))) @function
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
 )
 
 (function_definition
   name: (identifier) @name) @function
 
 (
-    (class_definition
-        name: (identifier) @name
-        body: (block
-            . (expression_statement (string) @doc))) @class
-    (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+  (class_definition
+    name: (identifier) @name
+    body: (block
+      . (expression_statement (string) @doc))) @class
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
 )
 
 (class_definition
@@ -30,6 +30,10 @@ const PYTHON_TAG_QUERY: &'static str = r#"
 
 (call
   function: (identifier) @name) @call
+
+(call
+  function: (attribute
+    attribute: (identifier) @name)) @call
 "#;
 
 const JS_TAG_QUERY: &'static str = r#"
@@ -179,6 +183,26 @@ fn test_tags_javascript() {
     assert_eq!(tags[2].docs, None);
 }
 
+#[test]
+fn test_tags_columns_measured_in_utf16_code_units() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
+
+    let tag = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .next()
+        .unwrap()
+        .unwrap();
+
+    assert_eq!(substr(source, &tag.name_range), "hello_α_ω");
+    assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
+    assert_eq!(tag.utf16_column_range, 9..18);
+}
+
 #[test]
 fn test_tags_ruby() {
     let language = get_language("ruby");
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 613e56ac..a240666f 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -1,10 +1,10 @@
 pub mod c_lib;
 
-use memchr::{memchr, memrchr};
+use memchr::memchr;
 use regex::Regex;
 use std::ops::Range;
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{fmt, mem, str};
+use std::{char, fmt, mem, str};
 use tree_sitter::{
     Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
 };
@@ -43,6 +43,7 @@ pub struct Tag {
     pub name_range: Range<usize>,
     pub line_range: Range<usize>,
     pub span: Range<Point>,
+    pub utf16_column_range: Range<usize>,
     pub docs: Option<String>,
 }
 
@@ -404,39 +405,32 @@ where
                     // Only create one tag per node. The tag queue is sorted by node position
                     // to allow for fast lookup.
                     let range = tag_node.byte_range();
-                    match self
-                        .tag_queue
-                        .binary_search_by_key(&(name_range.end, name_range.start), |(tag, _)| {
-                            (tag.name_range.end, tag.name_range.start)
-                        }) {
+                    let span = name_node.start_position()..name_node.end_position();
+                    let utf16_column_range =
+                        get_utf16_column_range(self.source, &name_range, &span);
+                    let line_range =
+                        line_range(self.source, name_range.start, span.start, MAX_LINE_LEN);
+                    let tag = Tag {
+                        line_range,
+                        span,
+                        utf16_column_range,
+                        kind,
+                        range,
+                        name_range,
+                        docs,
+                    };
+                    match self.tag_queue.binary_search_by_key(
+                        &(tag.name_range.end, tag.name_range.start),
+                        |(tag, _)| (tag.name_range.end, tag.name_range.start),
+                    ) {
                         Ok(i) => {
-                            let (tag, pattern_index) = &mut self.tag_queue[i];
+                            let (existing_tag, pattern_index) = &mut self.tag_queue[i];
                             if *pattern_index > mat.pattern_index {
                                 *pattern_index = mat.pattern_index;
-                                *tag = Tag {
-                                    line_range: line_range(self.source, range.start, MAX_LINE_LEN),
-                                    span: name_node.start_position()..name_node.end_position(),
-                                    kind,
-                                    range,
-                                    name_range,
-                                    docs,
-                                };
+                                *existing_tag = tag;
                             }
                         }
-                        Err(i) => self.tag_queue.insert(
-                            i,
-                            (
-                                Tag {
-                                    line_range: line_range(self.source, range.start, MAX_LINE_LEN),
-                                    span: name_node.start_position()..name_node.end_position(),
-                                    kind,
-                                    range,
-                                    name_range,
-                                    docs,
-                                },
-                                mat.pattern_index,
-                            ),
-                        ),
+                        Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)),
                     }
                 }
             }
@@ -475,11 +469,92 @@ impl From<QueryError> for Error {
     }
 }
 
-fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range<usize> {
-    let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1);
-    let max_line_len = max_line_len.min(text.len() - start);
-    let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len);
-    start..end
+pub struct LossyUtf8<'a> {
+    bytes: &'a [u8],
+    in_replacement: bool,
+}
+
+impl<'a> LossyUtf8<'a> {
+    pub fn new(bytes: &'a [u8]) -> Self {
+        LossyUtf8 {
+            bytes,
+            in_replacement: false,
+        }
+    }
+}
+
+impl<'a> Iterator for LossyUtf8<'a> {
+    type Item = &'a str;
+
+    fn next(&mut self) -> Option<&'a str> {
+        if self.bytes.is_empty() {
+            return None;
+        }
+        if self.in_replacement {
+            self.in_replacement = false;
+            return Some("\u{fffd}");
+        }
+        match str::from_utf8(self.bytes) {
+            Ok(valid) => {
+                self.bytes = &[];
+                Some(valid)
+            }
+            Err(error) => {
+                if let Some(error_len) = error.error_len() {
+                    let error_start = error.valid_up_to();
+                    if error_start > 0 {
+                        let result =
+                            unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) };
+                        self.bytes = &self.bytes[(error_start + error_len)..];
+                        self.in_replacement = true;
+                        Some(result)
+                    } else {
+                        self.bytes = &self.bytes[error_len..];
+                        Some("\u{fffd}")
+                    }
+                } else {
+                    None
+                }
+            }
+        }
+    }
+}
+
+fn line_range(
+    text: &[u8],
+    start_byte: usize,
+    start_point: Point,
+    max_line_len: usize,
+) -> Range<usize> {
+    let line_start_byte = start_byte - start_point.column;
+    let max_line_len = max_line_len.min(text.len() - line_start_byte);
+    let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)];
+    let len = if let Some(len) = memchr(b'\n', text_after_line_start) {
+        len
+    } else {
+        match str::from_utf8(text_after_line_start) {
+            Ok(s) => s.len(),
+            Err(e) => e.valid_up_to(),
+        }
+    };
+    line_start_byte..(line_start_byte + len)
+}
+
+fn get_utf16_column_range(
+    text: &[u8],
+    byte_range: &Range<usize>,
+    point_range: &Range<Point>,
+) -> Range<usize> {
+    let start = byte_range.start - point_range.start.column;
+    let preceding_text_on_line = &text[start..byte_range.start];
+    let start_col = utf16_len(preceding_text_on_line);
+    start_col..(start_col + utf16_len(&text[byte_range.clone()]))
+}
+
+fn utf16_len(bytes: &[u8]) -> usize {
+    LossyUtf8::new(bytes)
+        .flat_map(|chunk| chunk.chars().map(char::len_utf16))
+        .sum()
 }
 
 #[cfg(test)]
@@ -488,14 +563,10 @@ mod tests {
 
     #[test]
     fn test_get_line() {
-        let text = b"abc\ndefg\nhijkl";
-        assert_eq!(line_range(text, 0, 10), 0..3);
-        assert_eq!(line_range(text, 1, 10), 0..3);
-        assert_eq!(line_range(text, 2, 10), 0..3);
-        assert_eq!(line_range(text, 3, 10), 0..3);
-        assert_eq!(line_range(text, 1, 2), 0..2);
-        assert_eq!(line_range(text, 4, 10), 4..8);
-        assert_eq!(line_range(text, 5, 10), 4..8);
-        assert_eq!(line_range(text, 11, 10), 9..14);
+        let text = "abc\ndefg❤hij\nklmno".as_bytes();
+        assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14);
+        assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8);
+        assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20);
+        assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19);
     }
 }

From e9ea8192a3428a9a204167c27e7d0a76cbd4efd8 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 9 Jul 2020 09:11:34 -0700
Subject: [PATCH 29/71] Mention node version >= 6 in docs

Fixes #677
---
 docs/section-3-creating-parsers.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md
index c877ba6f..b075e488 100644
--- a/docs/section-3-creating-parsers.md
+++ b/docs/section-3-creating-parsers.md
@@ -13,7 +13,7 @@ Developing Tree-sitter grammars can have a difficult learning curve, but once yo
 
 In order to develop a Tree-sitter parser, there are two dependencies that you need to install:
 
-* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. It shouldn't matter what version of Node you have.
+* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. You'll need Node.js version 6.0 or greater.
 * **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform.
 
 ### Installation

From b52f28d6d5d740a85e539cde221b6742106f488f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 9 Jul 2020 11:28:07 -0700
Subject: [PATCH 30/71] Allow measuring time for tags subcommand

---
 cli/src/main.rs | 20 ++++++++++++---
 cli/src/tags.rs | 67 ++++++++++++++++++++++++++++++++-----------------
 2 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 757c70eb..d7a5e7b1 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -97,6 +97,8 @@ fn run() -> error::Result<()> {
                         .value_name("json|protobuf")
                         .help("Determine output format (default: json)"),
                 )
+                .arg(Arg::with_name("quiet").long("quiet").short("q"))
+                .arg(Arg::with_name("time").long("quiet").short("t"))
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
                 .arg(
                     Arg::with_name("inputs")
@@ -149,8 +151,14 @@ fn run() -> error::Result<()> {
                 .arg(Arg::with_name("path").index(1).multiple(true)),
         )
         .subcommand(
-            SubCommand::with_name("web-ui").about("Test a parser interactively in the browser")
-                .arg(Arg::with_name("quiet").long("quiet").short("q").help("open in default browser")),
+            SubCommand::with_name("web-ui")
+                .about("Test a parser interactively in the browser")
+                .arg(
+                    Arg::with_name("quiet")
+                        .long("quiet")
+                        .short("q")
+                        .help("open in default browser"),
+                ),
         )
         .subcommand(
             SubCommand::with_name("dump-languages")
@@ -268,7 +276,13 @@ fn run() -> error::Result<()> {
     } else if let Some(matches) = matches.subcommand_matches("tags") {
         loader.find_all_languages(&config.parser_directories)?;
         let paths = collect_paths(matches.values_of("inputs").unwrap())?;
-        tags::generate_tags(&loader, matches.value_of("scope"), &paths)?;
+        tags::generate_tags(
+            &loader,
+            matches.value_of("scope"),
+            &paths,
+            matches.is_present("quiet"),
+            matches.is_present("time"),
+        )?;
     } else if let Some(matches) = matches.subcommand_matches("highlight") {
         loader.configure_highlights(&config.theme.highlight_names);
         loader.find_all_languages(&config.parser_directories)?;
diff --git a/cli/src/tags.rs b/cli/src/tags.rs
index 515f4c52..5ea00f39 100644
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@@ -3,10 +3,17 @@ use super::util;
 use crate::error::{Error, Result};
 use std::io::{self, Write};
 use std::path::Path;
+use std::time::Instant;
 use std::{fs, str};
 use tree_sitter_tags::TagsContext;
 
-pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> Result<()> {
+pub fn generate_tags(
+    loader: &Loader,
+    scope: Option<&str>,
+    paths: &[String],
+    quiet: bool,
+    time: bool,
+) -> Result<()> {
     let mut lang = None;
     if let Some(scope) = scope {
         lang = loader.language_configuration_for_scope(scope)?;
@@ -34,36 +41,50 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
         };
 
         if let Some(tags_config) = language_config.tags_config(language)? {
-            let ident = if paths.len() > 1 {
-                let path_str = format!("{:?}", path);
-                writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?;
-                "\t"
+            let indent;
+            if paths.len() > 1 {
+                if !quiet {
+                    writeln!(&mut stdout, "{}", path.to_string_lossy())?;
+                }
+                indent = "\t"
             } else {
-                ""
+                indent = "";
             };
 
             let source = fs::read(path)?;
+            let t0 = Instant::now();
             for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? {
                 let tag = tag?;
-                write!(
-                    &mut stdout,
-                    "{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
-                    ident,
-                    str::from_utf8(&source[tag.name_range]).unwrap_or(""),
-                    &tags_config.syntax_type_name(tag.syntax_type_id),
-                    if tag.is_definition { "def" } else { "ref" },
-                    tag.span.start,
-                    tag.span.end,
-                    str::from_utf8(&source[tag.line_range]).unwrap_or(""),
-                )?;
-                if let Some(docs) = tag.docs {
-                    if docs.len() > 120 {
-                        write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
-                    } else {
-                        write!(&mut stdout, "\t{:?}", &docs)?;
+                if !quiet {
+                    write!(
+                        &mut stdout,
+                        "{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
+                        indent,
+                        str::from_utf8(&source[tag.name_range]).unwrap_or(""),
+                        &tags_config.syntax_type_name(tag.syntax_type_id),
+                        if tag.is_definition { "def" } else { "ref" },
+                        tag.span.start,
+                        tag.span.end,
+                        str::from_utf8(&source[tag.line_range]).unwrap_or(""),
+                    )?;
+                    if let Some(docs) = tag.docs {
+                        if docs.len() > 120 {
+                            write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
+                        } else {
+                            write!(&mut stdout, "\t{:?}", &docs)?;
+                        }
                     }
+                    writeln!(&mut stdout, "")?;
                 }
-                writeln!(&mut stdout, "")?;
+            }
+
+            if time {
+                writeln!(
+                    &mut stdout,
+                    "{}time: {}ms",
+                    indent,
+                    t0.elapsed().as_millis(),
+                )?;
             }
         } else {
             eprintln!("No tags config found for path {:?}", path);

From 1ecfc2548f1dfe0aa2ec34fb174555a27f37dde0 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 9 Jul 2020 11:30:30 -0700
Subject: [PATCH 31/71] tags: Move impls below type definitions

---
 tags/src/lib.rs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 7d58d99b..790b866a 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -63,15 +63,6 @@ pub enum Error {
     InvalidCapture(String),
 }
 
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name),
-            _ => write!(f, "{:?}", self)
-        }
-    }
-}
-
 #[derive(Debug, Default)]
 struct PatternInfo {
     docs_adjacent_capture: Option<u32>,
@@ -475,6 +466,15 @@ where
     }
 }
 
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name),
+            _ => write!(f, "{:?}", self)
+        }
+    }
+}
+
 impl From<regex::Error> for Error {
     fn from(error: regex::Error) -> Self {
         Error::Regex(error)

From 52360b103d0b293c54e83a188d7f2f1b9a7dc5d8 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 9 Jul 2020 12:07:57 -0700
Subject: [PATCH 32/71] tags: Fix comment position

---
 tags/src/lib.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 790b866a..41b4557a 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -423,8 +423,6 @@ where
                         }
                     }
 
-                    // Only create one tag per node. The tag queue is sorted by node position
-                    // to allow for fast lookup.
                     let range = tag_node.byte_range();
                     let span = name_node.start_position()..name_node.end_position();
                     let utf16_column_range =
@@ -441,6 +439,9 @@ where
                         is_definition,
                         syntax_type_id,
                     };
+
+                    // Only create one tag per node. The tag queue is sorted by node position
+                    // to allow for fast lookup.
                     match self.tag_queue.binary_search_by_key(
                         &(tag.name_range.end, tag.name_range.start),
                         |(tag, _)| (tag.name_range.end, tag.name_range.start),

From 0f805603104cab4d59c9f02154720fd000b22305 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 9 Jul 2020 12:13:12 -0700
Subject: [PATCH 33/71] tags: Reuse work when computing utf16 columns, line
 ranges

---
 tags/src/lib.rs | 64 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 15 deletions(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 41b4557a..ca5699ca 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -91,6 +91,7 @@ where
     matches: I,
     _tree: Tree,
     source: &'a [u8],
+    prev_line_info: Option<LineInfo>,
     config: &'a TagsConfiguration,
     cancellation_flag: Option<&'a AtomicUsize>,
     iter_count: usize,
@@ -98,6 +99,13 @@ where
     scopes: Vec<LocalScope<'a>>,
 }
 
+struct LineInfo {
+    utf8_position: Point,
+    utf8_byte: usize,
+    utf16_column: usize,
+    line_range: Range<usize>,
+}
+
 impl TagsConfiguration {
     pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result<Self, Error> {
         let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?;
@@ -260,6 +268,7 @@ impl TagsContext {
             source,
             config,
             cancellation_flag,
+            prev_line_info: None,
             tag_queue: Vec::new(),
             iter_count: 0,
             scopes: vec![LocalScope {
@@ -425,10 +434,46 @@ where
 
                     let range = tag_node.byte_range();
                     let span = name_node.start_position()..name_node.end_position();
-                    let utf16_column_range =
-                        get_utf16_column_range(self.source, &name_range, &span);
-                    let line_range =
-                        line_range(self.source, name_range.start, span.start, MAX_LINE_LEN);
+
+                    // Compute tag properties that depend on the text of the containing line. If the
+                    // previous tag occurred on the same line, then reuse results from the previous tag.
+                    let line_range;
+                    let mut prev_utf16_column = 0;
+                    let mut prev_utf8_byte = name_range.start - span.start.column;
+                    let line_info = self.prev_line_info.as_ref().and_then(|info| {
+                        if info.utf8_position.row == span.start.row {
+                            Some(info)
+                        } else {
+                            None
+                        }
+                    });
+                    if let Some(line_info) = line_info {
+                        line_range = line_info.line_range.clone();
+                        if line_info.utf8_position.column <= span.start.column {
+                            prev_utf8_byte = line_info.utf8_byte;
+                            prev_utf16_column = line_info.utf16_column;
+                        }
+                    } else {
+                        line_range = self::line_range(
+                            self.source,
+                            name_range.start,
+                            span.start,
+                            MAX_LINE_LEN,
+                        );
+                    }
+
+                    let utf16_start_column = prev_utf16_column
+                        + utf16_len(&self.source[prev_utf8_byte..name_range.start]);
+                    let utf16_end_column =
+                        utf16_start_column + utf16_len(&self.source[name_range.clone()]);
+                    let utf16_column_range = utf16_start_column..utf16_end_column;
+
+                    self.prev_line_info = Some(LineInfo {
+                        utf8_position: span.end,
+                        utf8_byte: name_range.end,
+                        utf16_column: utf16_end_column,
+                        line_range: line_range.clone(),
+                    });
                     let tag = Tag {
                         line_range,
                         span,
@@ -570,17 +615,6 @@ fn line_range(
     line_start_byte..line_end_byte
 }
 
-fn get_utf16_column_range(
-    text: &[u8],
-    byte_range: &Range<usize>,
-    point_range: &Range<Point>,
-) -> Range<usize> {
-    let line_start_byte = byte_range.start - point_range.start.column;
-    let preceding_text_on_line = &text[line_start_byte..byte_range.start];
-    let start_col = utf16_len(preceding_text_on_line);
-    start_col..(start_col + utf16_len(&text[byte_range.clone()]))
-}
-
 fn utf16_len(bytes: &[u8]) -> usize {
     LossyUtf8::new(bytes)
         .flat_map(|chunk| chunk.chars().map(char::len_utf16))

From 9e38fd9f5c32b58919c1cb422f06c8021da98207 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 9 Jul 2020 12:32:40 -0700
Subject: [PATCH 34/71] Add todo comment for LossyUtf8 iterator

---
 tags/src/lib.rs | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index ca5699ca..dcbb9984 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -106,6 +106,11 @@ struct LineInfo {
     line_range: Range<usize>,
 }
 
+struct LossyUtf8<'a> {
+    bytes: &'a [u8],
+    in_replacement: bool,
+}
+
 impl TagsConfiguration {
     pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result<Self, Error> {
         let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?;
@@ -533,13 +538,11 @@ impl From<QueryError> for Error {
     }
 }
 
-pub struct LossyUtf8<'a> {
-    bytes: &'a [u8],
-    in_replacement: bool,
-}
-
+// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy`
+// is ever stabilized, we should use that. Otherwise, this struct could be moved
+// into some module that's shared between `tree-sitter-tags` and `tree-sitter-highlight`.
 impl<'a> LossyUtf8<'a> {
-    pub fn new(bytes: &'a [u8]) -> Self {
+    fn new(bytes: &'a [u8]) -> Self {
         LossyUtf8 {
             bytes,
             in_replacement: false,

From 6cee04350f909c6611258ccaee06446e08218f0c Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 9 Jul 2020 13:39:47 -0700
Subject: [PATCH 35/71] tags: Expose utf16 column range to C API

---
 tags/include/tree_sitter/tags.h | 2 ++
 tags/src/c_lib.rs               | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h
index 58f5bbd9..f2b17075 100644
--- a/tags/include/tree_sitter/tags.h
+++ b/tags/include/tree_sitter/tags.h
@@ -28,6 +28,8 @@ typedef struct {
   uint32_t line_end_byte;
   TSPoint start_point;
   TSPoint end_point;
+  uint32_t utf16_start_column;
+  uint32_t utf16_end_column;
   uint32_t docs_start_byte;
   uint32_t docs_end_byte;
   uint32_t syntax_type_id;
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index 77f8aae5..07e1e19a 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -36,6 +36,8 @@ pub struct TSTag {
     pub line_end_byte: u32,
     pub start_point: TSPoint,
     pub end_point: TSPoint,
+    pub utf16_start_colum: u32,
+    pub utf16_end_colum: u32,
     pub docs_start_byte: u32,
     pub docs_end_byte: u32,
     pub syntax_type_id: u32,
@@ -161,6 +163,8 @@ pub extern "C" fn ts_tagger_tag(
                     row: tag.span.end.row as u32,
                     column: tag.span.end.column as u32,
                 },
+                utf16_start_colum: tag.utf16_column_range.start as u32,
+                utf16_end_colum: tag.utf16_column_range.end as u32,
                 docs_start_byte: prev_docs_len as u32,
                 docs_end_byte: buffer.docs.len() as u32,
                 syntax_type_id: tag.syntax_type_id,
@@ -225,7 +229,7 @@ pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name(
     *len = 0;
     if let Some(config) = tagger.languages.get(scope_name) {
         *len = config.c_syntax_type_names.len() as u32;
-        return config.c_syntax_type_names.as_ptr() as *const *const i8
+        return config.c_syntax_type_names.as_ptr() as *const *const i8;
     }
     std::ptr::null()
 }

From 0bfd47e2e5631af43ddf30abdac2043051bbe8af Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 10 Jul 2020 10:12:46 -0700
Subject: [PATCH 36/71] Improve error message when failing to run graphviz
 stuff

Fixes #682
---
 cli/src/util.rs | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/cli/src/util.rs b/cli/src/util.rs
index 8978ecc1..9f941f62 100644
--- a/cli/src/util.rs
+++ b/cli/src/util.rs
@@ -1,3 +1,4 @@
+use super::error::{Error, Result};
 use std::io;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
@@ -31,12 +32,12 @@ pub struct LogSession();
 pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
 
 #[cfg(windows)]
-pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
+pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
     Ok(LogSession())
 }
 
 #[cfg(unix)]
-pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
+pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
     use std::io::Write;
 
     let mut dot_file = std::fs::File::create(path)?;
@@ -46,11 +47,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession
         .stdin(Stdio::piped())
         .stdout(dot_file)
         .spawn()
-        .expect("Failed to run Dot");
+        .map_err(Error::wrap(|| {
+            "Failed to run the `dot` command. Check that graphviz is installed."
+        }))?;
     let dot_stdin = dot_process
         .stdin
         .take()
-        .expect("Failed to open stdin for Dot");
+        .ok_or_else(|| Error::new("Failed to open stdin for `dot` process.".to_string()))?;
     parser.print_dot_graphs(&dot_stdin);
     Ok(LogSession(
         PathBuf::from(path),

From 4d09c8157734cf18ef901c7ca8f736d0815cb76e Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 10 Jul 2020 13:33:04 -0700
Subject: [PATCH 37/71] highlight: Avoid accidentally treating locals patterns
 as highlight patterns

---
 highlight/src/lib.rs | 163 ++++++++++++++++++++-----------------------
 1 file changed, 75 insertions(+), 88 deletions(-)

diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs
index d2e27b46..bb110219 100644
--- a/highlight/src/lib.rs
+++ b/highlight/src/lib.rs
@@ -620,7 +620,7 @@ where
     type Item = Result<HighlightEvent, Error>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        loop {
+        'main: loop {
             // If we've already determined the next highlight boundary, just return it.
             if let Some(e) = self.next_event.take() {
                 return Some(Ok(e));
@@ -640,29 +640,34 @@ where
 
             // If none of the layers have any more highlight boundaries, terminate.
             if self.layers.is_empty() {
-                if self.byte_offset < self.source.len() {
+                return if self.byte_offset < self.source.len() {
                     let result = Some(Ok(HighlightEvent::Source {
                         start: self.byte_offset,
                         end: self.source.len(),
                     }));
                     self.byte_offset = self.source.len();
-                    return result;
+                    result
                 } else {
-                    return None;
-                }
+                    None
+                };
             }
 
             // Get the next capture from whichever layer has the earliest highlight boundary.
-            let match_;
-            let mut captures;
-            let mut capture;
-            let mut pattern_index;
+            let range;
             let layer = &mut self.layers[0];
-            if let Some((m, capture_index)) = layer.captures.peek() {
-                match_ = m;
-                captures = match_.captures;
-                pattern_index = match_.pattern_index;
-                capture = captures[*capture_index];
+            if let Some((next_match, capture_index)) = layer.captures.peek() {
+                let next_capture = next_match.captures[*capture_index];
+                range = next_capture.node.byte_range();
+
+                // If any previous highlight ends before this node starts, then before
+                // processing this capture, emit the source code up until the end of the
+                // previous highlight, and an end event for that highlight.
+                if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
+                    if end_byte <= range.start {
+                        layer.highlight_end_stack.pop();
+                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
+                    }
+                }
             }
             // If there are no more captures, then emit any remaining highlight end events.
             // And if there are none of those, then just advance to the end of the document.
@@ -673,30 +678,17 @@ where
                 return self.emit_event(self.source.len(), None);
             };
 
-            // If any previous highlight ends before this node starts, then before
-            // processing this capture, emit the source code up until the end of the
-            // previous highlight, and an end event for that highlight.
-            let range = capture.node.byte_range();
-            if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
-                if end_byte <= range.start {
-                    layer.highlight_end_stack.pop();
-                    return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
-                }
-            }
-
-            // Remove from the local scope stack any local scopes that have already ended.
-            while range.start > layer.scope_stack.last().unwrap().range.end {
-                layer.scope_stack.pop();
-            }
+            let (mut match_, capture_index) = layer.captures.next().unwrap();
+            let mut capture = match_.captures[capture_index];
 
             // If this capture represents an injection, then process the injection.
-            if pattern_index < layer.config.locals_pattern_index {
+            if match_.pattern_index < layer.config.locals_pattern_index {
                 let (language_name, content_node, include_children) =
-                    injection_for_match(&layer.config, &layer.config.query, match_, &self.source);
+                    injection_for_match(&layer.config, &layer.config.query, &match_, &self.source);
 
                 // Explicitly remove this match so that none of its other captures will remain
-                // in the stream of captures. The `unwrap` is ok because
-                layer.captures.next().unwrap().0.remove();
+                // in the stream of captures.
+                match_.remove();
 
                 // If a language is found with the given name, then add a new language layer
                 // to the highlighted document.
@@ -729,16 +721,19 @@ where
                 }
 
                 self.sort_layers();
-                continue;
+                continue 'main;
             }
 
-            layer.captures.next();
+            // Remove from the local scope stack any local scopes that have already ended.
+            while range.start > layer.scope_stack.last().unwrap().range.end {
+                layer.scope_stack.pop();
+            }
 
             // If this capture is for tracking local variables, then process the
             // local variable info.
             let mut reference_highlight = None;
             let mut definition_highlight = None;
-            while pattern_index < layer.config.highlights_pattern_index {
+            while match_.pattern_index < layer.config.highlights_pattern_index {
                 // If the node represents a local scope, push a new local scope onto
                 // the scope stack.
                 if Some(capture.index) == layer.config.local_scope_capture_index {
@@ -748,7 +743,7 @@ where
                         range: range.clone(),
                         local_defs: Vec::new(),
                     };
-                    for prop in layer.config.query.property_settings(pattern_index) {
+                    for prop in layer.config.query.property_settings(match_.pattern_index) {
                         match prop.key.as_ref() {
                             "local.scope-inherits" => {
                                 scope.inherits =
@@ -767,7 +762,7 @@ where
                     let scope = layer.scope_stack.last_mut().unwrap();
 
                     let mut value_range = 0..0;
-                    for capture in captures {
+                    for capture in match_.captures {
                         if Some(capture.index) == layer.config.local_def_value_capture_index {
                             value_range = capture.node.byte_range();
                         }
@@ -810,84 +805,76 @@ where
                     }
                 }
 
-                // Continue processing any additional local-variable-tracking patterns
-                // for the same node.
+                // Continue processing any additional matches for the same node.
                 if let Some((next_match, next_capture_index)) = layer.captures.peek() {
                     let next_capture = next_match.captures[*next_capture_index];
                     if next_capture.node == capture.node {
-                        pattern_index = next_match.pattern_index;
-                        captures = next_match.captures;
                         capture = next_capture;
-                        layer.captures.next();
+                        match_ = layer.captures.next().unwrap().0;
                         continue;
-                    } else {
-                        break;
                     }
                 }
 
-                break;
+                self.sort_layers();
+                continue 'main;
             }
 
             // Otherwise, this capture must represent a highlight.
-            let mut has_highlight = true;
-
             // If this exact range has already been highlighted by an earlier pattern, or by
             // a different layer, then skip over this one.
             if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
                 if range.start == last_start && range.end == last_end && layer.depth < last_depth {
-                    has_highlight = false;
+                    self.sort_layers();
+                    continue 'main;
                 }
             }
 
             // If the current node was found to be a local variable, then skip over any
             // highlighting patterns that are disabled for local variables.
-            while has_highlight
-                && (definition_highlight.is_some() || reference_highlight.is_some())
-                && layer.config.non_local_variable_patterns[pattern_index]
-            {
-                has_highlight = false;
-                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
-                    let next_capture = next_match.captures[*next_capture_index];
-                    if next_capture.node == capture.node {
-                        capture = next_capture;
-                        has_highlight = true;
-                        pattern_index = next_match.pattern_index;
-                        layer.captures.next();
-                        continue;
+            if definition_highlight.is_some() || reference_highlight.is_some() {
+                while layer.config.non_local_variable_patterns[match_.pattern_index] {
+                    if let Some((next_match, next_capture_index)) = layer.captures.peek() {
+                        let next_capture = next_match.captures[*next_capture_index];
+                        if next_capture.node == capture.node {
+                            capture = next_capture;
+                            match_ = layer.captures.next().unwrap().0;
+                            continue;
+                        }
                     }
+
+                    self.sort_layers();
+                    continue 'main;
                 }
-                break;
             }
 
-            if has_highlight {
-                // Once a highlighting pattern is found for the current node, skip over
-                // any later highlighting patterns that also match this node. Captures
-                // for a given node are ordered by pattern index, so these subsequent
-                // captures are guaranteed to be for highlighting, not injections or
-                // local variables.
-                while let Some((next_match, next_capture_index)) = layer.captures.peek() {
-                    if next_match.captures[*next_capture_index].node == capture.node {
-                        layer.captures.next();
-                    } else {
-                        break;
-                    }
+            // Once a highlighting pattern is found for the current node, skip over
+            // any later highlighting patterns that also match this node. Captures
+            // for a given node are ordered by pattern index, so these subsequent
+            // captures are guaranteed to be for highlighting, not injections or
+            // local variables.
+            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
+                let next_capture = next_match.captures[*next_capture_index];
+                if next_capture.node == capture.node {
+                    layer.captures.next();
+                } else {
+                    break;
                 }
+            }
 
-                let current_highlight = layer.config.highlight_indices[capture.index as usize];
+            let current_highlight = layer.config.highlight_indices[capture.index as usize];
 
-                // If this node represents a local definition, then store the current
-                // highlight value on the local scope entry representing this node.
-                if let Some(definition_highlight) = definition_highlight {
-                    *definition_highlight = current_highlight;
-                }
+            // If this node represents a local definition, then store the current
+            // highlight value on the local scope entry representing this node.
+            if let Some(definition_highlight) = definition_highlight {
+                *definition_highlight = current_highlight;
+            }
 
-                // Emit a scope start event and push the node's end position to the stack.
-                if let Some(highlight) = reference_highlight.or(current_highlight) {
-                    self.last_highlight_range = Some((range.start, range.end, layer.depth));
-                    layer.highlight_end_stack.push(range.end);
-                    return self
-                        .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
-                }
+            // Emit a scope start event and push the node's end position to the stack.
+            if let Some(highlight) = reference_highlight.or(current_highlight) {
+                self.last_highlight_range = Some((range.start, range.end, layer.depth));
+                layer.highlight_end_stack.push(range.end);
+                return self
+                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
             }
 
             self.sort_layers();

From e4e785b567eb975c5fa6900b08728aac856bdaad Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 10 Jul 2020 13:47:56 -0700
Subject: [PATCH 38/71] Remove unused flags from tags CLI command

---
 cli/src/main.rs | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index d7a5e7b1..713bf28f 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -90,13 +90,6 @@ fn run() -> error::Result<()> {
         )
         .subcommand(
             SubCommand::with_name("tags")
-                .arg(
-                    Arg::with_name("format")
-                        .short("f")
-                        .long("format")
-                        .value_name("json|protobuf")
-                        .help("Determine output format (default: json)"),
-                )
                 .arg(Arg::with_name("quiet").long("quiet").short("q"))
                 .arg(Arg::with_name("time").long("quiet").short("t"))
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
@@ -106,12 +99,6 @@ fn run() -> error::Result<()> {
                         .index(1)
                         .required(true)
                         .multiple(true),
-                )
-                .arg(
-                    Arg::with_name("v")
-                        .short("v")
-                        .multiple(true)
-                        .help("Sets the level of verbosity"),
                 ),
         )
         .subcommand(

From c2fb0f5229b1bb72005da5177457fafb1560954a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tu=E1=BA=A5n-Anh=20Nguy=E1=BB=85n?= <ubolonton@gmail.com>
Date: Sun, 12 Jul 2020 20:45:17 +0700
Subject: [PATCH 39/71] cli: Add --byte-range flag to query command

---
 cli/src/main.rs  | 12 +++++++++++-
 cli/src/query.rs |  4 ++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 713bf28f..a543202d 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -85,6 +85,12 @@ fn run() -> error::Result<()> {
                         .multiple(true)
                         .required(true),
                 )
+                .arg(
+                    Arg::with_name("beg>:<end")
+                        .help("The range of byte offsets in which the query will be executed")
+                        .long("byte-range")
+                        .takes_value(true)
+                )
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
                 .arg(Arg::with_name("captures").long("captures").short("c")),
         )
@@ -259,7 +265,11 @@ fn run() -> error::Result<()> {
             matches.value_of("scope"),
         )?;
         let query_path = Path::new(matches.value_of("query-path").unwrap());
-        query::query_files_at_paths(language, paths, query_path, ordered_captures)?;
+        let range = matches.value_of("beg>:<end").map(|br| {
+            let r: Vec<&str> = br.split(":").collect();
+            (r[0].parse().unwrap(), r[1].parse().unwrap())
+        });
+        query::query_files_at_paths(language, paths, query_path, ordered_captures, range)?;
     } else if let Some(matches) = matches.subcommand_matches("tags") {
         loader.find_all_languages(&config.parser_directories)?;
         let paths = collect_paths(matches.values_of("inputs").unwrap())?;
diff --git a/cli/src/query.rs b/cli/src/query.rs
index 47242273..8d097911 100644
--- a/cli/src/query.rs
+++ b/cli/src/query.rs
@@ -9,6 +9,7 @@ pub fn query_files_at_paths(
     paths: Vec<&Path>,
     query_path: &Path,
     ordered_captures: bool,
+    range: Option<(usize, usize)>,
 ) -> Result<()> {
     let stdout = io::stdout();
     let mut stdout = stdout.lock();
@@ -20,6 +21,9 @@ pub fn query_files_at_paths(
         .map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?;
 
     let mut query_cursor = QueryCursor::new();
+    if let Some((beg, end)) = range {
+        query_cursor.set_byte_range(beg, end);
+    }
 
     let mut parser = Parser::new();
     parser.set_language(language).map_err(|e| e.to_string())?;

From 91a715799e1b468c8303c7c612416c04f5a9c5fb Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 14 Jul 2020 15:04:39 -0700
Subject: [PATCH 40/71] Accept a paths file to most CLI subcommands

---
 cli/src/main.rs  | 111 +++++++++++++++++++++++++++--------------------
 cli/src/query.rs |   6 +--
 2 files changed, 68 insertions(+), 49 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index a543202d..0668d08d 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -53,11 +53,12 @@ fn run() -> error::Result<()> {
         .subcommand(
             SubCommand::with_name("parse")
                 .about("Parse files")
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                 .arg(
-                    Arg::with_name("path")
+                    Arg::with_name("paths")
                         .index(1)
                         .multiple(true)
-                        .required(true),
+                        .required(false),
                 )
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
                 .arg(Arg::with_name("debug").long("debug").short("d"))
@@ -79,17 +80,18 @@ fn run() -> error::Result<()> {
             SubCommand::with_name("query")
                 .about("Search files using a syntax tree query")
                 .arg(Arg::with_name("query-path").index(1).required(true))
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                 .arg(
-                    Arg::with_name("path")
+                    Arg::with_name("paths")
                         .index(2)
                         .multiple(true)
-                        .required(true),
+                        .required(false),
                 )
                 .arg(
-                    Arg::with_name("beg>:<end")
+                    Arg::with_name("byte-range")
                         .help("The range of byte offsets in which the query will be executed")
                         .long("byte-range")
-                        .takes_value(true)
+                        .takes_value(true),
                 )
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
                 .arg(Arg::with_name("captures").long("captures").short("c")),
@@ -99,11 +101,11 @@ fn run() -> error::Result<()> {
                 .arg(Arg::with_name("quiet").long("quiet").short("q"))
                 .arg(Arg::with_name("time").long("quiet").short("t"))
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                 .arg(
-                    Arg::with_name("inputs")
+                    Arg::with_name("paths")
                         .help("The source file to use")
                         .index(1)
-                        .required(true)
                         .multiple(true),
                 ),
         )
@@ -122,11 +124,12 @@ fn run() -> error::Result<()> {
         .subcommand(
             SubCommand::with_name("highlight")
                 .about("Highlight a file")
+                .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                 .arg(
-                    Arg::with_name("path")
+                    Arg::with_name("paths")
                         .index(1)
                         .multiple(true)
-                        .required(true),
+                        .required(false),
                 )
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
                 .arg(Arg::with_name("html").long("html").short("h"))
@@ -225,7 +228,9 @@ fn run() -> error::Result<()> {
         let timeout = matches
             .value_of("timeout")
             .map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
-        let paths = collect_paths(matches.values_of("path").unwrap())?;
+
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
+
         let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
         let mut has_error = false;
         loader.find_all_languages(&config.parser_directories)?;
@@ -251,28 +256,23 @@ fn run() -> error::Result<()> {
         }
     } else if let Some(matches) = matches.subcommand_matches("query") {
         let ordered_captures = matches.values_of("captures").is_some();
-        let paths = matches
-            .values_of("path")
-            .unwrap()
-            .into_iter()
-            .map(Path::new)
-            .collect::<Vec<&Path>>();
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
         loader.find_all_languages(&config.parser_directories)?;
         let language = select_language(
             &mut loader,
-            paths[0],
+            Path::new(&paths[0]),
             &current_dir,
             matches.value_of("scope"),
         )?;
         let query_path = Path::new(matches.value_of("query-path").unwrap());
-        let range = matches.value_of("beg>:<end").map(|br| {
+        let range = matches.value_of("byte-range").map(|br| {
             let r: Vec<&str> = br.split(":").collect();
             (r[0].parse().unwrap(), r[1].parse().unwrap())
         });
         query::query_files_at_paths(language, paths, query_path, ordered_captures, range)?;
     } else if let Some(matches) = matches.subcommand_matches("tags") {
         loader.find_all_languages(&config.parser_directories)?;
-        let paths = collect_paths(matches.values_of("inputs").unwrap())?;
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
         tags::generate_tags(
             &loader,
             matches.value_of("scope"),
@@ -285,7 +285,7 @@ fn run() -> error::Result<()> {
         loader.find_all_languages(&config.parser_directories)?;
 
         let time = matches.is_present("time");
-        let paths = collect_paths(matches.values_of("path").unwrap())?;
+        let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
         let html_mode = matches.is_present("html");
         if html_mode {
             println!("{}", highlight::HTML_HEADER);
@@ -358,39 +358,58 @@ fn run() -> error::Result<()> {
     Ok(())
 }
 
-fn collect_paths<'a>(paths: impl Iterator<Item = &'a str>) -> error::Result<Vec<String>> {
-    let mut result = Vec::new();
+fn collect_paths<'a>(
+    paths_file: Option<&str>,
+    paths: Option<impl Iterator<Item = &'a str>>,
+) -> error::Result<Vec<String>> {
+    if let Some(paths_file) = paths_file {
+        return Ok(fs::read_to_string(paths_file)
+            .map_err(Error::wrap(|| {
+                format!("Failed to read paths file {}", paths_file)
+            }))?
+            .trim()
+            .split_ascii_whitespace()
+            .map(String::from)
+            .collect::<Vec<_>>());
+    }
 
-    let mut incorporate_path = |path: &str, positive| {
-        if positive {
-            result.push(path.to_string());
-        } else {
-            if let Some(index) = result.iter().position(|p| p == path) {
-                result.remove(index);
+    if let Some(paths) = paths {
+        let mut result = Vec::new();
+
+        let mut incorporate_path = |path: &str, positive| {
+            if positive {
+                result.push(path.to_string());
+            } else {
+                if let Some(index) = result.iter().position(|p| p == path) {
+                    result.remove(index);
+                }
             }
-        }
-    };
+        };
 
-    for mut path in paths {
-        let mut positive = true;
-        if path.starts_with("!") {
-            positive = false;
-            path = path.trim_start_matches("!");
-        }
+        for mut path in paths {
+            let mut positive = true;
+            if path.starts_with("!") {
+                positive = false;
+                path = path.trim_start_matches("!");
+            }
 
-        if Path::new(path).exists() {
-            incorporate_path(path, positive);
-        } else {
-            let paths =
-                glob(path).map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
-            for path in paths {
-                if let Some(path) = path?.to_str() {
-                    incorporate_path(path, positive);
+            if Path::new(path).exists() {
+                incorporate_path(path, positive);
+            } else {
+                let paths = glob(path)
+                    .map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
+                for path in paths {
+                    if let Some(path) = path?.to_str() {
+                        incorporate_path(path, positive);
+                    }
                 }
             }
         }
+
+        return Ok(result);
     }
-    Ok(result)
+
+    Err(Error::new("Must provide one or more paths".to_string()))
 }
 
 fn select_language(
diff --git a/cli/src/query.rs b/cli/src/query.rs
index 8d097911..e71e6254 100644
--- a/cli/src/query.rs
+++ b/cli/src/query.rs
@@ -6,7 +6,7 @@ use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
 
 pub fn query_files_at_paths(
     language: Language,
-    paths: Vec<&Path>,
+    paths: Vec<String>,
     query_path: &Path,
     ordered_captures: bool,
     range: Option<(usize, usize)>,
@@ -29,9 +29,9 @@ pub fn query_files_at_paths(
     parser.set_language(language).map_err(|e| e.to_string())?;
 
     for path in paths {
-        writeln!(&mut stdout, "{}", path.to_str().unwrap())?;
+        writeln!(&mut stdout, "{}", path)?;
 
-        let source_code = fs::read(path).map_err(Error::wrap(|| {
+        let source_code = fs::read(&path).map_err(Error::wrap(|| {
             format!("Error reading source file {:?}", path)
         }))?;
         let text_callback = |n: Node| &source_code[n.byte_range()];

From 4535efce69016d28360618f9fc13e4ad4401b545 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 17 Jul 2020 09:39:06 -0700
Subject: [PATCH 41/71] query: Prevent dropping of matches when exceeding range
 maximum

Fixes #685
---
 cli/src/tests/query_test.rs | 39 +++++++++++++++++++++++++++++++++
 lib/src/query.c             | 43 ++++++++++++++++++++++++++-----------
 2 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs
index d4f18c7d..06ecc42e 100644
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@@ -1189,6 +1189,45 @@ fn test_query_matches_within_byte_range() {
     });
 }
 
+#[test]
+fn test_query_captures_within_byte_range() {
+    allocations::record(|| {
+        let language = get_language("c");
+        let query = Query::new(
+            language,
+            "
+            (call_expression
+                function: (identifier) @function
+                arguments: (argument_list (string_literal) @string.arg))
+
+            (string_literal) @string
+           ",
+        )
+        .unwrap();
+
+        let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#;
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+
+        let mut cursor = QueryCursor::new();
+        let captures =
+            cursor
+                .set_byte_range(3, 27)
+                .captures(&query, tree.root_node(), to_callback(source));
+
+        assert_eq!(
+            collect_captures(captures, &query, source),
+            &[
+                ("function", "DEFUN"),
+                ("string.arg", "\"safe-length\""),
+                ("string", "\"safe-length\""),
+            ]
+        );
+    });
+}
+
 #[test]
 fn test_query_matches_different_queries_same_cursor() {
     allocations::record(|| {
diff --git a/lib/src/query.c b/lib/src/query.c
index ff243494..b95ba057 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -172,6 +172,7 @@ struct TSQueryCursor {
   TSPoint start_point;
   TSPoint end_point;
   bool ascending;
+  bool halted;
 };
 
 static const TSQueryError PARENT_DONE = -1;
@@ -1286,6 +1287,7 @@ TSQueryCursor *ts_query_cursor_new(void) {
   TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
   *self = (TSQueryCursor) {
     .ascending = false,
+    .halted = false,
     .states = array_new(),
     .finished_states = array_new(),
     .capture_list_pool = capture_list_pool_new(),
@@ -1319,6 +1321,7 @@ void ts_query_cursor_exec(
   self->next_state_id = 0;
   self->depth = 0;
   self->ascending = false;
+  self->halted = false;
   self->query = query;
 }
 
@@ -1522,18 +1525,30 @@ static QueryState *ts_query__cursor_copy_state(
 // `finished_states` array. Multiple patterns can finish on the same node. If
 // there are no more matches, return `false`.
 static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
-  do {
+  bool did_match = false;
+  for (;;) {
+    if (self->halted) {
+      while (self->states.size > 0) {
+        QueryState state = array_pop(&self->states);
+        capture_list_pool_release(
+          &self->capture_list_pool,
+          state.capture_list_id
+        );
+      }
+    }
+
+    if (did_match || self->halted) return did_match;
+
     if (self->ascending) {
       LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
 
       // Leave this node by stepping to its next sibling or to its parent.
-      bool did_move = true;
       if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
         self->ascending = false;
       } else if (ts_tree_cursor_goto_parent(&self->cursor)) {
         self->depth--;
       } else {
-        did_move = false;
+        self->halted = true;
       }
 
       // After leaving a node, remove any states that cannot make further progress.
@@ -1545,10 +1560,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         // If a state completed its pattern inside of this node, but was deferred from finishing
         // in order to search for longer matches, mark it as finished.
         if (step->depth == PATTERN_DONE_MARKER) {
-          if (state->start_depth > self->depth || !did_move) {
+          if (state->start_depth > self->depth || self->halted) {
             LOG("  finish pattern %u\n", state->pattern_index);
             state->id = self->next_state_id++;
             array_push(&self->finished_states, *state);
+            did_match = true;
             deleted_count++;
             continue;
           }
@@ -1575,10 +1591,6 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         }
       }
       self->states.size -= deleted_count;
-
-      if (!did_move) {
-        return self->finished_states.size > 0;
-      }
     } else {
       // If this node is before the selected range, then avoid descending into it.
       TSNode node = ts_tree_cursor_current_node(&self->cursor);
@@ -1596,7 +1608,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
       if (
         self->end_byte <= ts_node_start_byte(node) ||
         point_lte(self->end_point, ts_node_start_point(node))
-      ) return false;
+      ) {
+        self->halted = true;
+        continue;
+      }
 
       // Get the properties of the current node.
       TSSymbol symbol = ts_node_symbol(node);
@@ -1888,6 +1903,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
               state->id = self->next_state_id++;
               array_push(&self->finished_states, *state);
               array_erase(&self->states, state - self->states.contents);
+              did_match = true;
               i--;
             }
           }
@@ -1901,9 +1917,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         self->ascending = true;
       }
     }
-  } while (self->finished_states.size == 0);
-
-  return true;
+  }
 }
 
 bool ts_query_cursor_next_match(
@@ -2043,7 +2057,10 @@ bool ts_query_cursor_next_capture(
 
     // If there are no finished matches that are ready to be returned, then
     // continue finding more matches.
-    if (!ts_query_cursor__advance(self)) return false;
+    if (
+      !ts_query_cursor__advance(self) &&
+      self->finished_states.size == 0
+    ) return false;
   }
 }
 

From f4adf0269af810e410c40a663c561511fb8c0467 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 17 Jul 2020 09:53:01 -0700
Subject: [PATCH 42/71] Propagate dynamic precedence correctly for inlined
 rules

Fixes #683
---
 cli/src/generate/prepare_grammar/process_inlines.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs
index 9ef89d75..f83658b2 100644
--- a/cli/src/generate/prepare_grammar/process_inlines.rs
+++ b/cli/src/generate/prepare_grammar/process_inlines.rs
@@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder {
                                     last_inserted_step.associativity = removed_step.associativity;
                                 }
                             }
+                            if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
+                                production.dynamic_precedence = p.dynamic_precedence;
+                            }
                             production
                         }),
                     );
@@ -226,7 +229,7 @@ mod tests {
                             ],
                         },
                         Production {
-                            dynamic_precedence: 0,
+                            dynamic_precedence: -2,
                             steps: vec![ProductionStep::new(Symbol::terminal(14))],
                         },
                     ],
@@ -258,7 +261,7 @@ mod tests {
                     ],
                 },
                 Production {
-                    dynamic_precedence: 0,
+                    dynamic_precedence: -2,
                     steps: vec![
                         ProductionStep::new(Symbol::terminal(10)),
                         ProductionStep::new(Symbol::terminal(14)),

From c4fca5f73e194988dbb2790aa37f93fffaa284f5 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 17 Jul 2020 14:19:59 -0700
Subject: [PATCH 43/71] node types: Fix handling of repetitions inside of
 fields

Fixes #676
---
 cli/src/generate/node_types.rs | 239 ++++++++++++++++++++-------------
 1 file changed, 149 insertions(+), 90 deletions(-)

diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs
index 9c3bea64..6df40807 100644
--- a/cli/src/generate/node_types.rs
+++ b/cli/src/generate/node_types.rs
@@ -19,7 +19,7 @@ pub(crate) struct FieldInfo {
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub(crate) struct VariableInfo {
     pub fields: HashMap<String, FieldInfo>,
-    pub child_types: Vec<ChildType>,
+    pub children: FieldInfo,
     pub children_without_fields: FieldInfo,
     pub has_multi_step_production: bool,
 }
@@ -70,7 +70,7 @@ impl Default for FieldInfoJSON {
 
 impl Default for ChildQuantity {
     fn default() -> Self {
-        Self::zero()
+        Self::one()
     }
 }
 
@@ -158,7 +158,7 @@ pub(crate) fn get_variable_info(
 
     // Each variable's summary can depend on the summaries of other hidden variables,
     // and variables can have mutually recursive structure. So we compute the summaries
-    // iteratively, in a loop that terminates only when more changes are possible.
+    // iteratively, in a loop that terminates only when no more changes are possible.
     let mut did_change = true;
     let mut all_initialized = false;
     let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()];
@@ -168,13 +168,14 @@ pub(crate) fn get_variable_info(
         for (i, variable) in syntax_grammar.variables.iter().enumerate() {
             let mut variable_info = result[i].clone();
 
-            // Within a variable, consider each production separately. For each
-            // production, determine which children and fields can occur, and how many
-            // times they can occur.
-            for (production_index, production) in variable.productions.iter().enumerate() {
-                let mut field_quantities = HashMap::new();
-                let mut children_without_fields_quantity = ChildQuantity::zero();
-                let mut has_uninitialized_invisible_children = false;
+            // Examine each of the variable's productions. The variable's child types can be
+            // immediately combined across all productions, but the child quantities must be
+            // recorded separately for each production.
+            for production in &variable.productions {
+                let mut production_field_quantities = HashMap::new();
+                let mut production_children_quantity = ChildQuantity::zero();
+                let mut production_children_without_fields_quantity = ChildQuantity::zero();
+                let mut production_has_uninitialized_invisible_children = false;
 
                 if production.steps.len() > 1 {
                     variable_info.has_multi_step_production = true;
@@ -190,106 +191,92 @@ pub(crate) fn get_variable_info(
                         ChildType::Normal(child_symbol)
                     };
 
-                    // Record all of the types of direct children.
-                    did_change |= sorted_vec_insert(&mut variable_info.child_types, &child_type);
+                    let child_is_hidden = !child_type_is_visible(&child_type)
+                        && !syntax_grammar.supertype_symbols.contains(&child_symbol);
 
-                    // Record all of the field names that occur.
+                    // Maintain the set of all child types for this variable, and the quantity of
+                    // visible children in this production.
+                    did_change |= sorted_vec_insert(&mut variable_info.children.types, &child_type);
+                    if !child_is_hidden {
+                        production_children_quantity.append(ChildQuantity::one());
+                    }
+
+                    // Maintain the set of child types associated with each field, and the quantity
+                    // of children associated with each field in this production.
                     if let Some(field_name) = &step.field_name {
-                        // Record how many times each field occurs in this production.
-                        field_quantities
-                            .entry(field_name)
-                            .or_insert(ChildQuantity::zero())
-                            .append(ChildQuantity::one());
-
-                        // Record the types of children for this field.
-                        let field_info =
-                            variable_info.fields.entry(field_name.clone()).or_insert({
-                                let mut info = FieldInfo {
-                                    types: Vec::new(),
-                                    quantity: ChildQuantity::one(),
-                                };
-
-                                // If this field did *not* occur in an earlier production,
-                                // then it is not required.
-                                if production_index > 0 {
-                                    info.quantity.required = false;
-                                }
-                                info
-                            });
+                        let field_info = variable_info
+                            .fields
+                            .entry(field_name.clone())
+                            .or_insert(FieldInfo::default());
                         did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
-                    }
-                    // Record named children without fields.
-                    else if child_type_is_named(&child_type) {
-                        // Record how many named children without fields occur in this production.
-                        children_without_fields_quantity.append(ChildQuantity::one());
 
-                        // Record the types of all of the named children without fields.
-                        let children_info = &mut variable_info.children_without_fields;
-                        if children_info.types.is_empty() {
-                            children_info.quantity = ChildQuantity::one();
+                        let production_field_quantity = production_field_quantities
+                            .entry(field_name)
+                            .or_insert(ChildQuantity::zero());
+
+                        // Inherit the types and quantities of hidden children associated with fields.
+                        if child_is_hidden {
+                            let child_variable_info = &result[child_symbol.index];
+                            for child_type in &child_variable_info.children.types {
+                                did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
+                            }
+                            production_field_quantity.append(child_variable_info.children.quantity);
+                        } else {
+                            production_field_quantity.append(ChildQuantity::one());
                         }
-                        did_change |= sorted_vec_insert(&mut children_info.types, &child_type);
+                    }
+                    // Maintain the set of named children without fields within this variable.
+                    else if child_type_is_named(&child_type) {
+                        production_children_without_fields_quantity.append(ChildQuantity::one());
+                        did_change |= sorted_vec_insert(
+                            &mut variable_info.children_without_fields.types,
+                            &child_type,
+                        );
                     }
 
-                    // Inherit information from any hidden children.
-                    if child_symbol.is_non_terminal()
-                        && !syntax_grammar.supertype_symbols.contains(&child_symbol)
-                        && step.alias.is_none()
-                        && !child_type_is_visible(&child_type)
-                    {
+                    // Inherit all child information from hidden children.
+                    if child_is_hidden && child_symbol.is_non_terminal() {
                         let child_variable_info = &result[child_symbol.index];
 
-                        // If a hidden child can have multiple children, then this
-                        // node can appear to have multiple children.
+                        // If a hidden child can have multiple children, then its parent node can
+                        // appear to have multiple children.
                         if child_variable_info.has_multi_step_production {
                             variable_info.has_multi_step_production = true;
                         }
 
-                        // Inherit fields from this hidden child
+                        // If a hidden child has fields, then the parent node can appear to have
+                        // those same fields.
                         for (field_name, child_field_info) in &child_variable_info.fields {
-                            field_quantities
+                            production_field_quantities
                                 .entry(field_name)
                                 .or_insert(ChildQuantity::zero())
                                 .append(child_field_info.quantity);
                             let field_info = variable_info
                                 .fields
                                 .entry(field_name.clone())
-                                .or_insert(FieldInfo {
-                                    types: Vec::new(),
-                                    quantity: ChildQuantity::one(),
-                                });
+                                .or_insert(FieldInfo::default());
                             for child_type in &child_field_info.types {
-                                sorted_vec_insert(&mut field_info.types, &child_type);
-                            }
-                        }
-
-                        // Inherit child types from this hidden child
-                        for child_type in &child_variable_info.child_types {
-                            did_change |=
-                                sorted_vec_insert(&mut variable_info.child_types, child_type);
-                        }
-
-                        // If any field points to this hidden child, inherit child types
-                        // for the field.
-                        if let Some(field_name) = &step.field_name {
-                            let field_info = variable_info.fields.get_mut(field_name).unwrap();
-                            for child_type in &child_variable_info.child_types {
                                 did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
                             }
                         }
-                        // Inherit info about children without fields from this hidden child.
-                        else {
+
+                        // If a hidden child has children, then the parent node can appear to have
+                        // those same children.
+                        production_children_quantity.append(child_variable_info.children.quantity);
+                        for child_type in &child_variable_info.children.types {
+                            did_change |=
+                                sorted_vec_insert(&mut variable_info.children.types, child_type);
+                        }
+
+                        // If a hidden child can have named children without fields, then the parent
+                        // node can appear to have those same children.
+                        if step.field_name.is_none() {
                             let grandchildren_info = &child_variable_info.children_without_fields;
                             if !grandchildren_info.types.is_empty() {
-                                children_without_fields_quantity
-                                    .append(grandchildren_info.quantity);
-
-                                if variable_info.children_without_fields.types.is_empty() {
-                                    variable_info.children_without_fields.quantity =
-                                        ChildQuantity::one();
-                                }
-
-                                for child_type in &grandchildren_info.types {
+                                production_children_without_fields_quantity
+                                    .append(child_variable_info.children_without_fields.quantity);
+                                for child_type in &child_variable_info.children_without_fields.types
+                                {
                                     did_change |= sorted_vec_insert(
                                         &mut variable_info.children_without_fields.types,
                                         &child_type,
@@ -302,22 +289,27 @@ pub(crate) fn get_variable_info(
                     // Note whether or not this production contains children whose summaries
                     // have not yet been computed.
                     if child_symbol.index >= i && !all_initialized {
-                        has_uninitialized_invisible_children = true;
+                        production_has_uninitialized_invisible_children = true;
                     }
                 }
 
                 // If this production's children all have had their summaries initialized,
                 // then expand the quantity information with all of the possibilities introduced
                 // by this production.
-                if !has_uninitialized_invisible_children {
+                if !production_has_uninitialized_invisible_children {
+                    did_change |= variable_info
+                        .children
+                        .quantity
+                        .union(production_children_quantity);
+
                     did_change |= variable_info
                         .children_without_fields
                         .quantity
-                        .union(children_without_fields_quantity);
+                        .union(production_children_without_fields_quantity);
 
                     for (field_name, info) in variable_info.fields.iter_mut() {
                         did_change |= info.quantity.union(
-                            field_quantities
+                            production_field_quantities
                                 .get(field_name)
                                 .cloned()
                                 .unwrap_or(ChildQuantity::zero()),
@@ -352,7 +344,8 @@ pub(crate) fn get_variable_info(
     // Update all of the node type lists to eliminate hidden nodes.
     for supertype_symbol in &syntax_grammar.supertype_symbols {
         result[supertype_symbol.index]
-            .child_types
+            .children
+            .types
             .retain(child_type_is_visible);
     }
     for variable_info in result.iter_mut() {
@@ -467,7 +460,8 @@ pub(crate) fn generate_node_types_json(
                         subtypes: None,
                     });
             let mut subtypes = info
-                .child_types
+                .children
+                .types
                 .iter()
                 .map(child_type_to_node_type)
                 .collect::<Vec<_>>();
@@ -1461,6 +1455,71 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_get_variable_info_with_repetitions_inside_fields() {
+        let variable_info = get_variable_info(
+            &build_syntax_grammar(
+                vec![
+                    // Field associated with a repetition.
+                    SyntaxVariable {
+                        name: "rule0".to_string(),
+                        kind: VariableType::Named,
+                        productions: vec![
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
+                                    .with_field_name("field1")],
+                            },
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![],
+                            },
+                        ],
+                    },
+                    // Repetition node
+                    SyntaxVariable {
+                        name: "_rule0_repeat".to_string(),
+                        kind: VariableType::Hidden,
+                        productions: vec![
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![ProductionStep::new(Symbol::terminal(1))],
+                            },
+                            Production {
+                                dynamic_precedence: 0,
+                                steps: vec![
+                                    ProductionStep::new(Symbol::non_terminal(1)),
+                                    ProductionStep::new(Symbol::non_terminal(1)),
+                                ],
+                            },
+                        ],
+                    },
+                ],
+                vec![],
+            ),
+            &build_lexical_grammar(),
+            &AliasMap::new(),
+        )
+        .unwrap();
+
+        assert_eq!(
+            variable_info[0].fields,
+            vec![(
+                "field1".to_string(),
+                FieldInfo {
+                    quantity: ChildQuantity {
+                        exists: true,
+                        required: false,
+                        multiple: true,
+                    },
+                    types: vec![ChildType::Normal(Symbol::terminal(1))],
+                }
+            )]
+            .into_iter()
+            .collect::<HashMap<_, _>>()
+        );
+    }
+
     #[test]
     fn test_get_variable_info_with_inherited_fields() {
         let variable_info = get_variable_info(

From 12341dbbc03075e0b3bdcbf05191efbac78731fe Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 17 Jul 2020 14:23:54 -0700
Subject: [PATCH 44/71] 0.16.9

---
 Cargo.lock           | 2 +-
 cli/Cargo.toml       | 2 +-
 cli/npm/package.json | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index cdad3b61..117ac49e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -740,7 +740,7 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-cli"
-version = "0.16.8"
+version = "0.16.9"
 dependencies = [
  "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 0d85952f..52a2ed6b 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter-cli"
 description = "CLI tool for developing, testing, and using Tree-sitter parsers"
-version = "0.16.8"
+version = "0.16.9"
 authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
 edition = "2018"
 license = "MIT"
diff --git a/cli/npm/package.json b/cli/npm/package.json
index 738c5622..01afe107 100644
--- a/cli/npm/package.json
+++ b/cli/npm/package.json
@@ -1,6 +1,6 @@
 {
   "name": "tree-sitter-cli",
-  "version": "0.16.8",
+  "version": "0.16.9",
   "author": "Max Brunsfeld",
   "license": "MIT",
   "repository": {

From 82aa1462fd9f4b0d3a27dc2241318d6dbd0f6830 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 17 Jul 2020 15:12:13 -0700
Subject: [PATCH 45/71] Clean up get_variable_info function

---
 cli/src/generate/node_types.rs | 67 ++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs
index 6df40807..039d7190 100644
--- a/cli/src/generate/node_types.rs
+++ b/cli/src/generate/node_types.rs
@@ -196,7 +196,8 @@ pub(crate) fn get_variable_info(
 
                     // Maintain the set of all child types for this variable, and the quantity of
                     // visible children in this production.
-                    did_change |= sorted_vec_insert(&mut variable_info.children.types, &child_type);
+                    did_change |=
+                        extend_sorted(&mut variable_info.children.types, Some(&child_type));
                     if !child_is_hidden {
                         production_children_quantity.append(ChildQuantity::one());
                     }
@@ -208,7 +209,7 @@ pub(crate) fn get_variable_info(
                             .fields
                             .entry(field_name.clone())
                             .or_insert(FieldInfo::default());
-                        did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
+                        did_change |= extend_sorted(&mut field_info.types, Some(&child_type));
 
                         let production_field_quantity = production_field_quantities
                             .entry(field_name)
@@ -217,9 +218,10 @@ pub(crate) fn get_variable_info(
                         // Inherit the types and quantities of hidden children associated with fields.
                         if child_is_hidden {
                             let child_variable_info = &result[child_symbol.index];
-                            for child_type in &child_variable_info.children.types {
-                                did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
-                            }
+                            did_change |= extend_sorted(
+                                &mut field_info.types,
+                                &child_variable_info.children.types,
+                            );
                             production_field_quantity.append(child_variable_info.children.quantity);
                         } else {
                             production_field_quantity.append(ChildQuantity::one());
@@ -228,9 +230,9 @@ pub(crate) fn get_variable_info(
                     // Maintain the set of named children without fields within this variable.
                     else if child_type_is_named(&child_type) {
                         production_children_without_fields_quantity.append(ChildQuantity::one());
-                        did_change |= sorted_vec_insert(
+                        did_change |= extend_sorted(
                             &mut variable_info.children_without_fields.types,
-                            &child_type,
+                            Some(&child_type),
                         );
                     }
 
@@ -251,22 +253,23 @@ pub(crate) fn get_variable_info(
                                 .entry(field_name)
                                 .or_insert(ChildQuantity::zero())
                                 .append(child_field_info.quantity);
-                            let field_info = variable_info
-                                .fields
-                                .entry(field_name.clone())
-                                .or_insert(FieldInfo::default());
-                            for child_type in &child_field_info.types {
-                                did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
-                            }
+                            did_change |= extend_sorted(
+                                &mut variable_info
+                                    .fields
+                                    .entry(field_name.clone())
+                                    .or_insert(FieldInfo::default())
+                                    .types,
+                                &child_field_info.types,
+                            );
                         }
 
                         // If a hidden child has children, then the parent node can appear to have
                         // those same children.
                         production_children_quantity.append(child_variable_info.children.quantity);
-                        for child_type in &child_variable_info.children.types {
-                            did_change |=
-                                sorted_vec_insert(&mut variable_info.children.types, child_type);
-                        }
+                        did_change |= extend_sorted(
+                            &mut variable_info.children.types,
+                            &child_variable_info.children.types,
+                        );
 
                         // If a hidden child can have named children without fields, then the parent
                         // node can appear to have those same children.
@@ -275,13 +278,10 @@ pub(crate) fn get_variable_info(
                             if !grandchildren_info.types.is_empty() {
                                 production_children_without_fields_quantity
                                     .append(child_variable_info.children_without_fields.quantity);
-                                for child_type in &child_variable_info.children_without_fields.types
-                                {
-                                    did_change |= sorted_vec_insert(
-                                        &mut variable_info.children_without_fields.types,
-                                        &child_type,
-                                    );
-                                }
+                                did_change |= extend_sorted(
+                                    &mut variable_info.children_without_fields.types,
+                                    &child_variable_info.children_without_fields.types,
+                                );
                             }
                         }
                     }
@@ -680,16 +680,19 @@ fn variable_type_for_child_type(
     }
 }
 
-fn sorted_vec_insert<T>(vec: &mut Vec<T>, value: &T) -> bool
+fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T>) -> bool
 where
     T: Clone + Eq + Ord,
+    T: 'a,
 {
-    if let Err(i) = vec.binary_search(&value) {
-        vec.insert(i, value.clone());
-        true
-    } else {
-        false
-    }
+    values.into_iter().any(|value| {
+        if let Err(i) = vec.binary_search(&value) {
+            vec.insert(i, value.clone());
+            true
+        } else {
+            false
+        }
+    })
 }
 
 #[cfg(test)]

From 740d864e678ab0c5518780afd906e2123d8a9d79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tu=E1=BA=A5n-Anh=20Nguy=E1=BB=85n?= <ubolonton@gmail.com>
Date: Sun, 19 Jul 2020 12:40:17 +0700
Subject: [PATCH 46/71] Add '.' as a valid start of a predicate, in addition to
 '#'

See https://github.com/ubolonton/emacs-tree-sitter/issues/38
---
 cli/src/tests/query_test.rs | 29 +++++++++++++++++++++++++++++
 lib/src/query.c             |  4 ++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs
index 06ecc42e..493bea8a 100644
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@@ -2087,6 +2087,35 @@ fn test_query_disable_pattern() {
     });
 }
 
+#[test]
+fn test_query_alternative_predicate_prefix() {
+    allocations::record(|| {
+        let language = get_language("c");
+        let query = Query::new(language, r#"
+            ((call_expression
+              function: (identifier) @keyword
+              arguments: (argument_list
+                          (string_literal) @function))
+             (.eq? @keyword "DEFUN"))
+        "#).unwrap();
+        let source = r#"
+            DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0,
+                   doc: /* Return the argument unchanged.  */
+                   attributes: const)
+              (Lisp_Object arg)
+            {
+              return arg;
+            }
+        "#;
+        assert_query_matches(
+            language,
+            &query,
+            source,
+            &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])],
+        );
+    });
+}
+
 fn assert_query_matches(
     language: Language,
     query: &Query,
diff --git a/lib/src/query.c b/lib/src/query.c
index b95ba057..acce2c72 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -805,8 +805,8 @@ static TSQueryError ts_query__parse_pattern(
       }
     }
 
-    // A pound character indicates the start of a predicate.
-    else if (stream->next == '#') {
+    // A dot/pound character indicates the start of a predicate.
+    else if (stream->next == '.' || stream->next == '#') {
       stream_advance(stream);
       return ts_query__parse_predicate(self, stream);
     }

From ba70927f573b0d098046da77888d3219ee31cc9d Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 20 Jul 2020 16:46:45 -0700
Subject: [PATCH 47/71] tags: Skip tags with a parse error inside the name

---
 tags/src/lib.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index dcbb9984..7733f3e3 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -372,6 +372,10 @@ where
                 }
 
                 if let (Some(tag_node), Some(name_node)) = (tag_node, name_node) {
+                    if name_node.has_error() {
+                        continue;
+                    }
+
                     let name_range = name_node.byte_range();
 
                     if pattern_info.name_must_be_non_local {

From a3b440b0c89763bb0b2e49f2a94144accc13462b Mon Sep 17 00:00:00 2001
From: Riccardo Schirone <sirmy15@gmail.com>
Date: Thu, 23 Jul 2020 09:48:18 +0200
Subject: [PATCH 48/71] size_t variables need %zu, not %lu

---
 lib/src/alloc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/src/alloc.h b/lib/src/alloc.h
index 9bbf7513..0e0927a9 100644
--- a/lib/src/alloc.h
+++ b/lib/src/alloc.h
@@ -45,7 +45,7 @@ static inline bool ts_toggle_allocation_recording(bool value) {
 static inline void *ts_malloc(size_t size) {
   void *result = malloc(size);
   if (size > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to allocate %lu bytes", size);
+    fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
     exit(1);
   }
   return result;
@@ -54,7 +54,7 @@ static inline void *ts_malloc(size_t size) {
 static inline void *ts_calloc(size_t count, size_t size) {
   void *result = calloc(count, size);
   if (count > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to allocate %lu bytes", count * size);
+    fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
     exit(1);
   }
   return result;
@@ -63,7 +63,7 @@ static inline void *ts_calloc(size_t count, size_t size) {
 static inline void *ts_realloc(void *buffer, size_t size) {
   void *result = realloc(buffer, size);
   if (size > 0 && !result) {
-    fprintf(stderr, "tree-sitter failed to reallocate %lu bytes", size);
+    fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
     exit(1);
   }
   return result;

From de2b71d465919cc361d45a4abecb867b12fdd6d4 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 23 Jul 2020 16:05:50 -0700
Subject: [PATCH 49/71] Fix query bug when max permutations are exceeded

---
 cli/src/loader.rs           |   4 +-
 cli/src/tests/query_test.rs |  45 +++++++++++-
 lib/src/query.c             | 138 +++++++++++++++++++++---------------
 3 files changed, 125 insertions(+), 62 deletions(-)

diff --git a/cli/src/loader.rs b/cli/src/loader.rs
index cf2eb143..62cc9b62 100644
--- a/cli/src/loader.rs
+++ b/cli/src/loader.rs
@@ -160,7 +160,9 @@ impl Loader {
                 // If multiple language configurations match, then determine which
                 // one to use by applying the configurations' content regexes.
                 else {
-                    let file_contents = fs::read_to_string(path)?;
+                    let file_contents = fs::read(path)
+                        .map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?;
+                    let file_contents = String::from_utf8_lossy(&file_contents);
                     let mut best_score = -2isize;
                     let mut best_configuration_id = None;
                     for configuration_id in configuration_ids {
diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs
index 493bea8a..c304f3b4 100644
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@@ -1135,6 +1135,43 @@ fn test_query_matches_with_too_many_permutations_to_track() {
     });
 }
 
+#[test]
+fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            "
+            (
+                (comment) @doc
+                ; not immediate
+                (class_declaration) @class
+            )
+
+            (call_expression
+                function: [
+                    (identifier) @function
+                    (member_expression property: (property_identifier) @method)
+                ])
+            ",
+        )
+        .unwrap();
+
+        let source = "/* hi */ a.b(); ".repeat(50);
+
+        let mut parser = Parser::new();
+        parser.set_language(language).unwrap();
+        let tree = parser.parse(&source, None).unwrap();
+        let mut cursor = QueryCursor::new();
+        let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
+
+        assert_eq!(
+            collect_matches(matches, &query, source.as_str()),
+            vec![(1, vec![("method", "b")]); 50],
+        );
+    });
+}
+
 #[test]
 fn test_query_matches_with_anonymous_tokens() {
     allocations::record(|| {
@@ -2091,13 +2128,17 @@ fn test_query_disable_pattern() {
 fn test_query_alternative_predicate_prefix() {
     allocations::record(|| {
         let language = get_language("c");
-        let query = Query::new(language, r#"
+        let query = Query::new(
+            language,
+            r#"
             ((call_expression
               function: (identifier) @keyword
               arguments: (argument_list
                           (string_literal) @function))
              (.eq? @keyword "DEFUN"))
-        "#).unwrap();
+        "#,
+        )
+        .unwrap();
         let source = r#"
             DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0,
                    doc: /* Return the argument unchanged.  */
diff --git a/lib/src/query.c b/lib/src/query.c
index acce2c72..05c767e1 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -122,6 +122,7 @@ typedef struct {
   uint16_t consumed_capture_count: 14;
   bool seeking_immediate_match: 1;
   bool has_in_progress_alternatives: 1;
+  bool dead: 1;
 } QueryState;
 
 typedef Array(TSQueryCapture) CaptureList;
@@ -1365,6 +1366,7 @@ static bool ts_query_cursor__first_in_progress_capture(
   *pattern_index = UINT32_MAX;
   for (unsigned i = 0; i < self->states.size; i++) {
     const QueryState *state = &self->states.contents[i];
+    if (state->dead) continue;
     const CaptureList *captures = capture_list_pool_get(
       &self->capture_list_pool,
       state->capture_list_id
@@ -1480,44 +1482,88 @@ static bool ts_query_cursor__add_state(
     .start_depth = self->depth - step->depth,
     .consumed_capture_count = 0,
     .seeking_immediate_match = false,
+    .has_in_progress_alternatives = false,
+    .dead = false,
   }));
   return true;
 }
 
+// Acquire a capture list for this state. If there are no capture lists left in the
+// pool, this will steal the capture list from another existing state, and mark that
+// other state as 'dead'.
+static CaptureList *ts_query_cursor__prepare_to_capture(
+  TSQueryCursor *self,
+  QueryState *state,
+  unsigned state_index_to_preserve
+) {
+  if (state->capture_list_id == NONE) {
+    state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
+
+    // If there are no capture lists left in the pool, then terminate whichever
+    // state has captured the earliest node in the document, and steal its
+    // capture list.
+    if (state->capture_list_id == NONE) {
+      uint32_t state_index, byte_offset, pattern_index;
+      if (
+        ts_query_cursor__first_in_progress_capture(
+          self,
+          &state_index,
+          &byte_offset,
+          &pattern_index
+        ) &&
+        state_index != state_index_to_preserve
+      ) {
+        LOG(
+          "  abandon state. index:%u, pattern:%u, offset:%u.\n",
+          state_index, pattern_index, byte_offset
+        );
+        QueryState *other_state = &self->states.contents[state_index];
+        state->capture_list_id = other_state->capture_list_id;
+        other_state->capture_list_id = NONE;
+        other_state->dead = true;
+        CaptureList *list = capture_list_pool_get_mut(
+          &self->capture_list_pool,
+          state->capture_list_id
+        );
+        array_clear(list);
+        return list;
+      } else {
+        LOG("  ran out of capture lists");
+        return NULL;
+      }
+    }
+  }
+  return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
+}
+
 // Duplicate the given state and insert the newly-created state immediately after
 // the given state in the `states` array.
-static QueryState *ts_query__cursor_copy_state(
+static QueryState *ts_query_cursor__copy_state(
   TSQueryCursor *self,
-  const QueryState *state
+  unsigned state_index
 ) {
   if (self->states.size >= MAX_STATE_COUNT) {
     LOG("  too many states");
     return NULL;
   }
 
-  // If the state has captures, copy its capture list.
+  const QueryState *state = &self->states.contents[state_index];
   QueryState copy = *state;
-  copy.capture_list_id = state->capture_list_id;
+  copy.capture_list_id = NONE;
+
+  // If the state has captures, copy its capture list.
   if (state->capture_list_id != NONE) {
-    copy.capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
-    if (copy.capture_list_id == NONE) {
-      LOG("  too many capture lists");
-      return NULL;
-    }
+    CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
+    if (!new_captures) return NULL;
     const CaptureList *old_captures = capture_list_pool_get(
       &self->capture_list_pool,
       state->capture_list_id
     );
-    CaptureList *new_captures = capture_list_pool_get_mut(
-      &self->capture_list_pool,
-      copy.capture_list_id
-    );
     array_push_all(new_captures, old_captures);
   }
 
-  uint32_t index = (state - self->states.contents) + 1;
-  array_insert(&self->states, index, copy);
-  return &self->states.contents[index];
+  array_insert(&self->states, state_index + 1, copy);
+  return &self->states.contents[state_index + 1];
 }
 
 // Walk the tree, processing patterns until at least one pattern finishes,
@@ -1728,7 +1774,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
           !step->is_pattern_start &&
           step->contains_captures
         ) {
-          if (ts_query__cursor_copy_state(self, state)) {
+          if (ts_query_cursor__copy_state(self, i)) {
             LOG(
               "  split state for capture. pattern:%u, step:%u\n",
               state->pattern_index,
@@ -1739,45 +1785,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         }
 
         // If the current node is captured in this pattern, add it to the capture list.
-        // For the first capture in a pattern, lazily acquire a capture list.
         if (step->capture_ids[0] != NONE) {
-          if (state->capture_list_id == NONE) {
-            state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
-
-            // If there are no capture lists left in the pool, then terminate whichever
-            // state has captured the earliest node in the document, and steal its
-            // capture list.
-            if (state->capture_list_id == NONE) {
-              uint32_t state_index, byte_offset, pattern_index;
-              if (ts_query_cursor__first_in_progress_capture(
-                self,
-                &state_index,
-                &byte_offset,
-                &pattern_index
-              )) {
-                LOG(
-                  "  abandon state. index:%u, pattern:%u, offset:%u.\n",
-                  state_index, pattern_index, byte_offset
-                );
-                state->capture_list_id = self->states.contents[state_index].capture_list_id;
-                array_erase(&self->states, state_index);
-                if (state_index < i) {
-                  i--;
-                  state--;
-                }
-              } else {
-                LOG("  too many finished states.\n");
-                array_erase(&self->states, i);
-                i--;
-                continue;
-              }
-            }
+          CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
+          if (!capture_list) {
+            array_erase(&self->states, i);
+            i--;
+            continue;
           }
 
-          CaptureList *capture_list = capture_list_pool_get_mut(
-            &self->capture_list_pool,
-            state->capture_list_id
-          );
           for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
             uint16_t capture_id = step->capture_ids[j];
             if (step->capture_ids[j] == NONE) break;
@@ -1800,10 +1815,9 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
           state->step_index
         );
 
-        // If this state's next step has an 'alternative' step (the step is either optional,
-        // or is the end of a repetition), then copy the state in order to pursue both
-        // alternatives. The alternative step itself may have an alternative, so this is
-        // an interative process.
+        // If this state's next step has an alternative step, then copy the state in order
+        // to pursue both alternatives. The alternative step itself may have an alternative,
+        // so this is an interative process.
         unsigned end_index = i + 1;
         for (unsigned j = i; j < end_index; j++) {
           QueryState *state = &self->states.contents[j];
@@ -1815,7 +1829,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
               continue;
             }
 
-            QueryState *copy = ts_query__cursor_copy_state(self, state);
+            QueryState *copy = ts_query_cursor__copy_state(self, j);
             if (next_step->is_pass_through) {
               state->step_index++;
               j--;
@@ -1841,14 +1855,20 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
 
       for (unsigned i = 0; i < self->states.size; i++) {
         QueryState *state = &self->states.contents[i];
-        bool did_remove = false;
+        if (state->dead) {
+          array_erase(&self->states, i);
+          i--;
+          continue;
+        }
 
         // Enfore the longest-match criteria. When a query pattern contains optional or
         // repeated nodes, this is necesssary to avoid multiple redundant states, where
         // one state has a strict subset of another state's captures.
+        bool did_remove = false;
         for (unsigned j = i + 1; j < self->states.size; j++) {
           QueryState *other_state = &self->states.contents[j];
           if (
+            !other_state->dead &&
             state->pattern_index == other_state->pattern_index &&
             state->start_depth == other_state->start_depth
           ) {

From 32099050d6d41ff9538c4f7c4991b66254cad024 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 24 Jul 2020 09:26:54 -0700
Subject: [PATCH 50/71] node_types: Fix panic when field is associated with a
 hidden token

Fixes #695
---
 cli/src/generate/node_types.rs | 35 +++++++++++++++++++++++++++++++++-
 cli/src/main.rs                |  2 +-
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs
index 039d7190..7a5768a5 100644
--- a/cli/src/generate/node_types.rs
+++ b/cli/src/generate/node_types.rs
@@ -216,7 +216,7 @@ pub(crate) fn get_variable_info(
                             .or_insert(ChildQuantity::zero());
 
                         // Inherit the types and quantities of hidden children associated with fields.
-                        if child_is_hidden {
+                        if child_is_hidden && child_symbol.is_non_terminal() {
                             let child_variable_info = &result[child_symbol.index];
                             did_change |= extend_sorted(
                                 &mut field_info.types,
@@ -352,6 +352,7 @@ pub(crate) fn get_variable_info(
         for (_, field_info) in variable_info.fields.iter_mut() {
             field_info.types.retain(child_type_is_visible);
         }
+        variable_info.fields.retain(|_, v| !v.types.is_empty());
         variable_info
             .children_without_fields
             .types
@@ -1174,6 +1175,38 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_node_types_with_fields_on_hidden_tokens() {
+        let node_types = get_node_types(InputGrammar {
+            name: String::new(),
+            extra_symbols: Vec::new(),
+            external_tokens: Vec::new(),
+            expected_conflicts: Vec::new(),
+            variables_to_inline: Vec::new(),
+            word_token: None,
+            supertype_symbols: vec![],
+            variables: vec![Variable {
+                name: "script".to_string(),
+                kind: VariableType::Named,
+                rule: Rule::seq(vec![
+                    Rule::field("a".to_string(), Rule::pattern("hi")),
+                    Rule::field("b".to_string(), Rule::pattern("bye")),
+                ]),
+            }],
+        });
+
+        assert_eq!(
+            node_types,
+            [NodeInfoJSON {
+                kind: "script".to_string(),
+                named: true,
+                fields: Some(BTreeMap::new()),
+                children: None,
+                subtypes: None
+            }]
+        );
+    }
+
     #[test]
     fn test_node_types_with_multiple_rules_same_alias_name() {
         let node_types = get_node_types(InputGrammar {
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 0668d08d..2f8c6dd5 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -99,7 +99,7 @@ fn run() -> error::Result<()> {
         .subcommand(
             SubCommand::with_name("tags")
                 .arg(Arg::with_name("quiet").long("quiet").short("q"))
-                .arg(Arg::with_name("time").long("quiet").short("t"))
+                .arg(Arg::with_name("time").long("time").short("t"))
                 .arg(Arg::with_name("scope").long("scope").takes_value(true))
                 .arg(Arg::with_name("paths-file").long("paths").takes_value(true))
                 .arg(

From 1ae5cbc851ca55214a59e675240cd2dfd1efb276 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 24 Jul 2020 10:49:20 -0700
Subject: [PATCH 51/71] query: Handle #not-match? in rust, wasm bindings

---
 cli/src/tests/query_test.rs        |  9 ++++++++-
 lib/binding_rust/lib.rs            | 10 ++++++----
 lib/binding_web/binding.js         |  4 +++-
 lib/binding_web/test/query-test.js |  9 ++++++++-
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs
index c304f3b4..914d41cd 100644
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@@ -1470,12 +1470,17 @@ fn test_query_captures_with_text_conditions() {
             ((identifier) @function.builtin
              (#eq? @function.builtin "require"))
 
-             (identifier) @variable
+            ((identifier) @variable
+             (#not-match? @variable "^(lambda|load)$"))
             "#,
         )
         .unwrap();
 
         let source = "
+          toad
+          load
+          panda
+          lambda
           const ab = require('./ab');
           new Cd(EF);
         ";
@@ -1489,6 +1494,8 @@ fn test_query_captures_with_text_conditions() {
         assert_eq!(
             collect_captures(captures, &query, source),
             &[
+                ("variable", "toad"),
+                ("variable", "panda"),
                 ("variable", "ab"),
                 ("function.builtin", "require"),
                 ("variable", "require"),
diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs
index c0aba32f..ec7cd791 100644
--- a/lib/binding_rust/lib.rs
+++ b/lib/binding_rust/lib.rs
@@ -169,7 +169,7 @@ pub enum QueryError {
 enum TextPredicate {
     CaptureEqString(u32, String, bool),
     CaptureEqCapture(u32, u32, bool),
-    CaptureMatchString(u32, regex::bytes::Regex),
+    CaptureMatchString(u32, regex::bytes::Regex, bool),
 }
 
 impl Language {
@@ -1298,7 +1298,7 @@ impl Query {
                         });
                     }
 
-                    "match?" => {
+                    "match?" | "not-match?" => {
                         if p.len() != 3 {
                             return Err(QueryError::Predicate(format!(
                                 "Wrong number of arguments to #match? predicate. Expected 2, got {}.",
@@ -1318,12 +1318,14 @@ impl Query {
                             )));
                         }
 
+                        let is_positive = operator_name == "match?";
                         let regex = &string_values[p[2].value_id as usize];
                         text_predicates.push(TextPredicate::CaptureMatchString(
                             p[1].value_id,
                             regex::bytes::Regex::new(regex).map_err(|_| {
                                 QueryError::Predicate(format!("Invalid regex '{}'", regex))
                             })?,
+                            is_positive,
                         ));
                     }
 
@@ -1607,9 +1609,9 @@ impl<'a> QueryMatch<'a> {
                     let node = self.capture_for_index(*i).unwrap();
                     (text_callback(node).as_ref() == s.as_bytes()) == *is_positive
                 }
-                TextPredicate::CaptureMatchString(i, r) => {
+                TextPredicate::CaptureMatchString(i, r, is_positive) => {
                     let node = self.capture_for_index(*i).unwrap();
-                    r.is_match(text_callback(node).as_ref())
+                    r.is_match(text_callback(node).as_ref()) == *is_positive
                 }
             })
     }
diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js
index 567b7eb3..3a193ef9 100644
--- a/lib/binding_web/binding.js
+++ b/lib/binding_web/binding.js
@@ -784,6 +784,8 @@ class Language {
               }
               break;
 
+            case 'not-match?':
+              isPositive = false;
             case 'match?':
               if (steps.length !== 3) throw new Error(
                 `Wrong number of arguments to \`#match?\` predicate. Expected 2, got ${steps.length - 1}.`
@@ -798,7 +800,7 @@ class Language {
               const regex = new RegExp(steps[2].value);
               textPredicates[i].push(function(captures) {
                 for (const c of captures) {
-                  if (c.name === captureName) return regex.test(c.node.text);
+                  if (c.name === captureName) return regex.test(c.node.text) === isPositive;
                 }
                 return false;
               });
diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js
index 9dda9834..9d1e24e1 100644
--- a/lib/binding_web/test/query-test.js
+++ b/lib/binding_web/test/query-test.js
@@ -126,12 +126,17 @@ describe("Query", () => {
 
     it("handles conditions that compare the text of capture to literal strings", () => {
       tree = parser.parse(`
+        lambda
+        panda
+        load
+        toad
         const ab = require('./ab');
         new Cd(EF);
       `);
 
       query = JavaScript.query(`
-        (identifier) @variable
+        ((identifier) @variable
+         (#not-match? @variable "^(lambda|load)$"))
 
         ((identifier) @function.builtin
          (#eq? @function.builtin "require"))
@@ -145,6 +150,8 @@ describe("Query", () => {
 
       const captures = query.captures(tree.rootNode);
       assert.deepEqual(formatCaptures(captures), [
+        { name: "variable", text: "panda" },
+        { name: "variable", text: "toad" },
         { name: "variable", text: "ab" },
         { name: "variable", text: "require" },
         { name: "function.builtin", text: "require" },

From d22240591c2accdc94de466f7352ee56c399a796 Mon Sep 17 00:00:00 2001
From: Santos Gallegos <stsewd@protonmail.com>
Date: Mon, 27 Jul 2020 17:38:32 -0500
Subject: [PATCH 52/71] Docs: document the `set!` predicate

I was looking for something like this,
I searched the documentation,
but I found it in https://github.com/tree-sitter/tree-sitter-javascript/blob/master/queries/injections.scm#L15
---
 docs/section-4-syntax-highlighting.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md
index 0182704b..cbf97b66 100644
--- a/docs/section-4-syntax-highlighting.md
+++ b/docs/section-4-syntax-highlighting.md
@@ -385,6 +385,14 @@ The following query would specify that the contents of the heredoc should be par
   (heredoc_end) @injection.language) @injection.content
 ```
 
+You can also force the language using the `#set!` predicate.
+For example, this will force the language to be always `ruby`.
+
+```
+((heredoc_body) @injection.content
+ (#set! injection.language "ruby"))
+```
+
 ## Unit Testing
 
 Tree-sitter has a built-in way to verify the results of syntax highlighting. The interface is based on [Sublime Text's system](https://www.sublimetext.com/docs/3/syntax.html#testing) for testing highlighting.

From 253f23c3d432d75cbb2b4c53f5ca090c1e46ae72 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 28 Jul 2020 13:30:34 -0700
Subject: [PATCH 53/71] Fix error when parse error occurs after non-terminal
 extra

---
 lib/src/parser.c                           | 53 +++++++++++++---------
 test/fixtures/error_corpus/ruby_errors.txt | 19 ++++++++
 2 files changed, 50 insertions(+), 22 deletions(-)
 create mode 100644 test/fixtures/error_corpus/ruby_errors.txt

diff --git a/lib/src/parser.c b/lib/src/parser.c
index 4d7dc1e5..035672b8 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -355,10 +355,11 @@ static Subtree ts_parser__lex(
   StackVersion version,
   TSStateId parse_state
 ) {
-  Length start_position = ts_stack_position(self->stack, version);
-  Subtree external_token = ts_stack_last_external_token(self->stack, version);
   TSLexMode lex_mode = self->language->lex_modes[parse_state];
   if (lex_mode.lex_state == (uint16_t)-1) return NULL_SUBTREE;
+
+  Length start_position = ts_stack_position(self->stack, version);
+  Subtree external_token = ts_stack_last_external_token(self->stack, version);
   const bool *valid_external_tokens = ts_language_enabled_external_tokens(
     self->language,
     lex_mode.external_lex_state
@@ -1345,24 +1346,26 @@ static bool ts_parser__advance(
     );
   }
 
-lex:
-  // Otherwise, re-run the lexer.
-  if (!lookahead.ptr) {
-    lookahead = ts_parser__lex(self, version, state);
-    if (lookahead.ptr) {
-      ts_parser__set_cached_token(self, position, last_external_token, lookahead);
-      ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
-    }
-
-    // When parsing a non-terminal extra, a null lookahead indicates the
-    // end of the rule. The reduction is stored in the EOF table entry.
-    // After the reduction, the lexer needs to be run again.
-    else {
-      ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
-    }
-  }
-
+  bool needs_lex = !lookahead.ptr;
   for (;;) {
+    // Otherwise, re-run the lexer.
+    if (needs_lex) {
+      needs_lex = false;
+      lookahead = ts_parser__lex(self, version, state);
+
+      if (lookahead.ptr) {
+        ts_parser__set_cached_token(self, position, last_external_token, lookahead);
+        ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
+      }
+
+      // When parsing a non-terminal extra, a null lookahead indicates the
+      // end of the rule. The reduction is stored in the EOF table entry.
+      // After the reduction, the lexer needs to be run again.
+      else {
+        ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
+      }
+    }
+
     // If a cancellation flag or a timeout was provided, then check every
     // time a fixed number of parse actions has been processed.
     if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
@@ -1459,8 +1462,10 @@ lex:
       // (and completing the non-terminal extra rule) run the lexer again based
       // on the current parse state.
       if (!lookahead.ptr) {
-        lookahead = ts_parser__lex(self, version, state);
+        needs_lex = true;
+        continue;
       }
+
       ts_language_table_entry(
         self->language,
         state,
@@ -1470,6 +1475,11 @@ lex:
       continue;
     }
 
+    if (!lookahead.ptr) {
+      ts_stack_pause(self->stack, version, ts_builtin_sym_end);
+      return true;
+    }
+
     // If there were no parse actions for the current lookahead token, then
     // it is not valid in this state. If the current lookahead token is a
     // keyword, then switch to treating it as the normal word token if that
@@ -1509,8 +1519,7 @@ lex:
     if (ts_parser__breakdown_top_of_stack(self, version)) {
       state = ts_stack_state(self->stack, version);
       ts_subtree_release(&self->tree_pool, lookahead);
-      lookahead = NULL_SUBTREE;
-      goto lex;
+      needs_lex = true;
       continue;
     }
 
diff --git a/test/fixtures/error_corpus/ruby_errors.txt b/test/fixtures/error_corpus/ruby_errors.txt
new file mode 100644
index 00000000..9c35781c
--- /dev/null
+++ b/test/fixtures/error_corpus/ruby_errors.txt
@@ -0,0 +1,19 @@
+==========================
+Heredocs with errors
+==========================
+
+joins(<<~SQL(
+  b
+SQL
+c
+
+---
+
+(program
+  (method_call
+    method: (identifier)
+    (ERROR (heredoc_beginning))
+    arguments: (argument_list
+      (heredoc_body (heredoc_end))
+      (identifier)
+      (MISSING ")"))))

From 81bbdf19f4dc42f5f30c589b3ed449b6150de3de Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 29 Jul 2020 09:50:13 -0700
Subject: [PATCH 54/71] Fix handling of non-terminal extras that share
 non-extra rules

Fixes #701
---
 .../generate/build_tables/minimize_parse_table.rs |  3 +++
 lib/src/parser.c                                  | 15 ++++++++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs
index 5d8f7f0f..aa4801c8 100644
--- a/cli/src/generate/build_tables/minimize_parse_table.rs
+++ b/cli/src/generate/build_tables/minimize_parse_table.rs
@@ -199,6 +199,9 @@ impl<'a> Minimizer<'a> {
         right_state: &ParseState,
         group_ids_by_state_id: &Vec<ParseStateId>,
     ) -> bool {
+        if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
+            return true;
+        }
         for (token, left_entry) in &left_state.terminal_entries {
             if let Some(right_entry) = right_state.terminal_entries.get(token) {
                 if self.entries_conflict(
diff --git a/lib/src/parser.c b/lib/src/parser.c
index 035672b8..37d1a1c2 100644
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@@ -356,7 +356,10 @@ static Subtree ts_parser__lex(
   TSStateId parse_state
 ) {
   TSLexMode lex_mode = self->language->lex_modes[parse_state];
-  if (lex_mode.lex_state == (uint16_t)-1) return NULL_SUBTREE;
+  if (lex_mode.lex_state == (uint16_t)-1) {
+    LOG("no_lookahead_after_non_terminal_extra");
+    return NULL_SUBTREE;
+  }
 
   Length start_position = ts_stack_position(self->stack, version);
   Subtree external_token = ts_stack_last_external_token(self->stack, version);
@@ -762,7 +765,7 @@ static StackVersion ts_parser__reduce(
   int dynamic_precedence,
   uint16_t production_id,
   bool is_fragile,
-  bool is_extra
+  bool end_of_non_terminal_extra
 ) {
   uint32_t initial_version_count = ts_stack_version_count(self->stack);
 
@@ -833,7 +836,9 @@ static StackVersion ts_parser__reduce(
 
     TSStateId state = ts_stack_state(self->stack, slice_version);
     TSStateId next_state = ts_language_next_state(self->language, state, symbol);
-    if (is_extra) parent.ptr->extra = true;
+    if (end_of_non_terminal_extra && next_state == state) {
+      parent.ptr->extra = true;
+    }
     if (is_fragile || pop.size > 1 || initial_version_count > 1) {
       parent.ptr->fragile_left = true;
       parent.ptr->fragile_right = true;
@@ -1417,12 +1422,12 @@ static bool ts_parser__advance(
 
         case TSParseActionTypeReduce: {
           bool is_fragile = table_entry.action_count > 1;
-          bool is_extra = lookahead.ptr == NULL;
+          bool end_of_non_terminal_extra = lookahead.ptr == NULL;
           LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count);
           StackVersion reduction_version = ts_parser__reduce(
             self, version, action.params.reduce.symbol, action.params.reduce.child_count,
             action.params.reduce.dynamic_precedence, action.params.reduce.production_id,
-            is_fragile, is_extra
+            is_fragile, end_of_non_terminal_extra
           );
           if (reduction_version != STACK_VERSION_NONE) {
             last_reduction_version = reduction_version;

From 9a7fdd29c263a1fa7778c7ec1cbc812397d88571 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 29 Jul 2020 09:53:07 -0700
Subject: [PATCH 55/71] Add test for non-terminal extras that share non-extra
 rules

---
 .../corpus.txt                                | 23 +++++++
 .../grammar.json                              | 68 +++++++++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
 create mode 100644 test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json

diff --git a/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
new file mode 100644
index 00000000..a22d8b8d
--- /dev/null
+++ b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
@@ -0,0 +1,23 @@
+=====
+Extras
+=====
+
+;
+%;
+%foo:;
+;
+bar: baz:;
+;
+
+---
+
+(program
+  (statement)
+  (macro_statement (statement))
+  (macro_statement (statement
+    (label_declaration (identifier))))
+  (statement)
+  (statement
+    (label_declaration (identifier))
+    (label_declaration (identifier)))
+  (statement))
diff --git a/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json
new file mode 100644
index 00000000..a7f51b8e
--- /dev/null
+++ b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json
@@ -0,0 +1,68 @@
+{
+  "name": "extra_non_terminals_with_shared_rules",
+
+  "extras": [
+    { "type": "PATTERN", "value": "\\s+" },
+    { "type": "SYMBOL", "name": "macro_statement" }
+  ],
+
+  "rules": {
+    "program": {
+      "type": "REPEAT",
+      "content": {
+        "type": "SYMBOL",
+        "name": "statement"
+      }
+    },
+    "statement": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "SYMBOL",
+            "name": "label_declaration"
+          }
+        },
+        {
+          "type": "STRING",
+          "value": ";"
+        }
+      ]
+    },
+    "macro_statement": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "STRING",
+          "value": "%"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "statement"
+        }
+      ]
+    },
+    "label_declaration": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "identifier"
+        },
+        {
+          "type": "STRING",
+          "value": ":"
+        }
+      ]
+    },
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  },
+  "conflicts": [],
+  "externals": [],
+  "inline": [],
+  "supertypes": []
+}

From 4ec7d8096853b1b478da3588206eb2a29559efa9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 29 Jul 2020 10:04:05 -0700
Subject: [PATCH 56/71] Mention rule order as the fallback criteria in docs

Fixes #702
---
 docs/section-3-creating-parsers.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md
index b075e488..694f8dae 100644
--- a/docs/section-3-creating-parsers.md
+++ b/docs/section-3-creating-parsers.md
@@ -505,6 +505,8 @@ Grammars often contain multiple tokens that can match the same characters. For e
 
 4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`.
 
+5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar.
+
 ### Keywords
 
 Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this:

From e89a19a1588382c24ca807c7e43520efe60e311a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 29 Jul 2020 15:30:13 -0700
Subject: [PATCH 57/71] tags: Add @ignore capture

---
 cli/src/tests/tags_test.rs |   8 +-
 tags/src/lib.rs            | 254 +++++++++++++++++++++----------------
 2 files changed, 153 insertions(+), 109 deletions(-)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index f00e83ac..3ff1c92b 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -68,11 +68,13 @@ const JS_TAG_QUERY: &'static str = r#"
 
 const RUBY_TAG_QUERY: &'static str = r#"
 (method
-    name: (identifier) @name) @definition.method
+    name: (_) @name) @definition.method
 
 (method_call
     method: (identifier) @name) @reference.call
 
+(setter (identifier) @ignore)
+
 ((identifier) @name @reference.call
  (#is-not? local))
 "#;
@@ -207,7 +209,7 @@ fn test_tags_ruby() {
         "
         b = 1
 
-        def foo()
+        def foo=()
             c = 1
 
             # a is a method because it is not in scope
@@ -239,7 +241,7 @@ fn test_tags_ruby() {
             ))
             .collect::<Vec<_>>(),
         &[
-            ("foo", "method", (2, 4)),
+            ("foo=", "method", (2, 4)),
             ("bar", "call", (7, 4)),
             ("a", "call", (7, 8)),
             ("b", "call", (7, 11)),
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 7733f3e3..07209e4d 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -25,6 +25,7 @@ pub struct TagsConfiguration {
     capture_map: HashMap<u32, NamedCapture>,
     doc_capture_index: Option<u32>,
     name_capture_index: Option<u32>,
+    ignore_capture_index: Option<u32>,
     local_scope_capture_index: Option<u32>,
     local_definition_capture_index: Option<u32>,
     tags_pattern_index: usize,
@@ -128,12 +129,14 @@ impl TagsConfiguration {
         let mut syntax_type_names = Vec::new();
         let mut doc_capture_index = None;
         let mut name_capture_index = None;
+        let mut ignore_capture_index = None;
         let mut local_scope_capture_index = None;
         let mut local_definition_capture_index = None;
         for (i, name) in query.capture_names().iter().enumerate() {
             match name.as_str() {
                 "" => continue,
                 "name" => name_capture_index = Some(i as u32),
+                "ignore" => ignore_capture_index = Some(i as u32),
                 "doc" => doc_capture_index = Some(i as u32),
                 "local.scope" => local_scope_capture_index = Some(i as u32),
                 "local.definition" => local_definition_capture_index = Some(i as u32),
@@ -222,6 +225,7 @@ impl TagsConfiguration {
             capture_map,
             doc_capture_index,
             name_capture_index,
+            ignore_capture_index,
             tags_pattern_index,
             local_scope_capture_index,
             local_definition_capture_index,
@@ -311,7 +315,12 @@ where
                 if self.tag_queue.len() > 1
                     && self.tag_queue[0].0.name_range.end < last_entry.0.name_range.start
                 {
-                    return Some(Ok(self.tag_queue.remove(0).0));
+                    let tag = self.tag_queue.remove(0).0;
+                    if tag.is_ignored() {
+                        continue;
+                    } else {
+                        return Some(Ok(tag));
+                    }
                 }
             }
 
@@ -350,10 +359,16 @@ where
                 let mut syntax_type_id = 0;
                 let mut is_definition = false;
                 let mut docs_adjacent_node = None;
+                let mut is_ignored = false;
 
                 for capture in mat.captures {
                     let index = Some(capture.index);
 
+                    if index == self.config.ignore_capture_index {
+                        is_ignored = true;
+                        name_node = Some(capture.node);
+                    }
+
                     if index == self.config.pattern_info[mat.pattern_index].docs_adjacent_capture {
                         docs_adjacent_node = Some(capture.node);
                     }
@@ -371,129 +386,137 @@ where
                     }
                 }
 
-                if let (Some(tag_node), Some(name_node)) = (tag_node, name_node) {
-                    if name_node.has_error() {
-                        continue;
-                    }
-
+                if let Some(name_node) = name_node {
                     let name_range = name_node.byte_range();
 
-                    if pattern_info.name_must_be_non_local {
-                        let mut is_local = false;
-                        for scope in self.scopes.iter().rev() {
-                            if scope.range.start <= name_range.start
-                                && scope.range.end >= name_range.end
-                            {
-                                if scope
-                                    .local_defs
-                                    .iter()
-                                    .any(|d| d.name == &self.source[name_range.clone()])
-                                {
-                                    is_local = true;
-                                    break;
-                                }
-                                if !scope.inherits {
-                                    break;
-                                }
-                            }
-                        }
-                        if is_local {
+                    let tag;
+                    if let Some(tag_node) = tag_node {
+                        if name_node.has_error() {
                             continue;
                         }
-                    }
 
-                    // If needed, filter the doc nodes based on their ranges, selecting
-                    // only the slice that are adjacent to some specified node.
-                    let mut docs_start_index = 0;
-                    if let (Some(docs_adjacent_node), false) =
-                        (docs_adjacent_node, doc_nodes.is_empty())
-                    {
-                        docs_start_index = doc_nodes.len();
-                        let mut start_row = docs_adjacent_node.start_position().row;
-                        while docs_start_index > 0 {
-                            let doc_node = &doc_nodes[docs_start_index - 1];
-                            let prev_doc_end_row = doc_node.end_position().row;
-                            if prev_doc_end_row + 1 >= start_row {
-                                docs_start_index -= 1;
-                                start_row = doc_node.start_position().row;
-                            } else {
-                                break;
+                        if pattern_info.name_must_be_non_local {
+                            let mut is_local = false;
+                            for scope in self.scopes.iter().rev() {
+                                if scope.range.start <= name_range.start
+                                    && scope.range.end >= name_range.end
+                                {
+                                    if scope
+                                        .local_defs
+                                        .iter()
+                                        .any(|d| d.name == &self.source[name_range.clone()])
+                                    {
+                                        is_local = true;
+                                        break;
+                                    }
+                                    if !scope.inherits {
+                                        break;
+                                    }
+                                }
+                            }
+                            if is_local {
+                                continue;
                             }
                         }
-                    }
 
-                    // Generate a doc string from all of the doc nodes, applying any strip regexes.
-                    let mut docs = None;
-                    for doc_node in &doc_nodes[docs_start_index..] {
-                        if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()]) {
-                            let content = if let Some(regex) = &pattern_info.doc_strip_regex {
-                                regex.replace_all(content, "").to_string()
-                            } else {
-                                content.to_string()
-                            };
-                            match &mut docs {
-                                None => docs = Some(content),
-                                Some(d) => {
-                                    d.push('\n');
-                                    d.push_str(&content);
+                        // If needed, filter the doc nodes based on their ranges, selecting
+                        // only the slice that are adjacent to some specified node.
+                        let mut docs_start_index = 0;
+                        if let (Some(docs_adjacent_node), false) =
+                            (docs_adjacent_node, doc_nodes.is_empty())
+                        {
+                            docs_start_index = doc_nodes.len();
+                            let mut start_row = docs_adjacent_node.start_position().row;
+                            while docs_start_index > 0 {
+                                let doc_node = &doc_nodes[docs_start_index - 1];
+                                let prev_doc_end_row = doc_node.end_position().row;
+                                if prev_doc_end_row + 1 >= start_row {
+                                    docs_start_index -= 1;
+                                    start_row = doc_node.start_position().row;
+                                } else {
+                                    break;
                                 }
                             }
                         }
-                    }
 
-                    let range = tag_node.byte_range();
-                    let span = name_node.start_position()..name_node.end_position();
+                        // Generate a doc string from all of the doc nodes, applying any strip regexes.
+                        let mut docs = None;
+                        for doc_node in &doc_nodes[docs_start_index..] {
+                            if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()])
+                            {
+                                let content = if let Some(regex) = &pattern_info.doc_strip_regex {
+                                    regex.replace_all(content, "").to_string()
+                                } else {
+                                    content.to_string()
+                                };
+                                match &mut docs {
+                                    None => docs = Some(content),
+                                    Some(d) => {
+                                        d.push('\n');
+                                        d.push_str(&content);
+                                    }
+                                }
+                            }
+                        }
 
-                    // Compute tag properties that depend on the text of the containing line. If the
-                    // previous tag occurred on the same line, then reuse results from the previous tag.
-                    let line_range;
-                    let mut prev_utf16_column = 0;
-                    let mut prev_utf8_byte = name_range.start - span.start.column;
-                    let line_info = self.prev_line_info.as_ref().and_then(|info| {
-                        if info.utf8_position.row == span.start.row {
-                            Some(info)
+                        let range = tag_node.byte_range();
+                        let span = name_node.start_position()..name_node.end_position();
+
+                        // Compute tag properties that depend on the text of the containing line. If the
+                        // previous tag occurred on the same line, then reuse results from the previous tag.
+                        let line_range;
+                        let mut prev_utf16_column = 0;
+                        let mut prev_utf8_byte = name_range.start - span.start.column;
+                        let line_info = self.prev_line_info.as_ref().and_then(|info| {
+                            if info.utf8_position.row == span.start.row {
+                                Some(info)
+                            } else {
+                                None
+                            }
+                        });
+                        if let Some(line_info) = line_info {
+                            line_range = line_info.line_range.clone();
+                            if line_info.utf8_position.column <= span.start.column {
+                                prev_utf8_byte = line_info.utf8_byte;
+                                prev_utf16_column = line_info.utf16_column;
+                            }
                         } else {
-                            None
-                        }
-                    });
-                    if let Some(line_info) = line_info {
-                        line_range = line_info.line_range.clone();
-                        if line_info.utf8_position.column <= span.start.column {
-                            prev_utf8_byte = line_info.utf8_byte;
-                            prev_utf16_column = line_info.utf16_column;
+                            line_range = self::line_range(
+                                self.source,
+                                name_range.start,
+                                span.start,
+                                MAX_LINE_LEN,
+                            );
                         }
+
+                        let utf16_start_column = prev_utf16_column
+                            + utf16_len(&self.source[prev_utf8_byte..name_range.start]);
+                        let utf16_end_column =
+                            utf16_start_column + utf16_len(&self.source[name_range.clone()]);
+                        let utf16_column_range = utf16_start_column..utf16_end_column;
+
+                        self.prev_line_info = Some(LineInfo {
+                            utf8_position: span.end,
+                            utf8_byte: name_range.end,
+                            utf16_column: utf16_end_column,
+                            line_range: line_range.clone(),
+                        });
+                        tag = Tag {
+                            line_range,
+                            span,
+                            utf16_column_range,
+                            range,
+                            name_range,
+                            docs,
+                            is_definition,
+                            syntax_type_id,
+                        };
+                    } else if is_ignored {
+                        tag = Tag::ignored(name_range);
                     } else {
-                        line_range = self::line_range(
-                            self.source,
-                            name_range.start,
-                            span.start,
-                            MAX_LINE_LEN,
-                        );
+                        continue;
                     }
 
-                    let utf16_start_column = prev_utf16_column
-                        + utf16_len(&self.source[prev_utf8_byte..name_range.start]);
-                    let utf16_end_column =
-                        utf16_start_column + utf16_len(&self.source[name_range.clone()]);
-                    let utf16_column_range = utf16_start_column..utf16_end_column;
-
-                    self.prev_line_info = Some(LineInfo {
-                        utf8_position: span.end,
-                        utf8_byte: name_range.end,
-                        utf16_column: utf16_end_column,
-                        line_range: line_range.clone(),
-                    });
-                    let tag = Tag {
-                        line_range,
-                        span,
-                        utf16_column_range,
-                        range,
-                        name_range,
-                        docs,
-                        is_definition,
-                        syntax_type_id,
-                    };
-
                     // Only create one tag per node. The tag queue is sorted by node position
                     // to allow for fast lookup.
                     match self.tag_queue.binary_search_by_key(
@@ -521,6 +544,25 @@ where
     }
 }
 
+impl Tag {
+    fn ignored(name_range: Range<usize>) -> Self {
+        Tag {
+            name_range,
+            line_range: 0..0,
+            span: Point::new(0, 0)..Point::new(0, 0),
+            utf16_column_range: 0..0,
+            range: usize::MAX..usize::MAX,
+            docs: None,
+            is_definition: false,
+            syntax_type_id: 0,
+        }
+    }
+
+    fn is_ignored(&self) -> bool {
+        self.range.start == usize::MAX
+    }
+}
+
 impl fmt::Display for Error {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {

From df5510acfc0561e64fc2a89fc21ec286eda4feb4 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 30 Jul 2020 12:59:34 -0700
Subject: [PATCH 58/71] query: Remove limit on number of in-progress states

---
 lib/src/query.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/lib/src/query.c b/lib/src/query.c
index 05c767e1..15827cd7 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -11,7 +11,6 @@
 // #define LOG(...) fprintf(stderr, __VA_ARGS__)
 #define LOG(...)
 
-#define MAX_STATE_COUNT 256
 #define MAX_CAPTURE_LIST_COUNT 32
 #define MAX_STEP_CAPTURE_COUNT 3
 
@@ -1297,8 +1296,8 @@ TSQueryCursor *ts_query_cursor_new(void) {
     .start_point = {0, 0},
     .end_point = POINT_MAX,
   };
-  array_reserve(&self->states, MAX_STATE_COUNT);
-  array_reserve(&self->finished_states, MAX_CAPTURE_LIST_COUNT);
+  array_reserve(&self->states, 8);
+  array_reserve(&self->finished_states, 8);
   return self;
 }
 
@@ -1465,10 +1464,6 @@ static bool ts_query_cursor__add_state(
   TSQueryCursor *self,
   const PatternEntry *pattern
 ) {
-  if (self->states.size >= MAX_STATE_COUNT) {
-    LOG("  too many states");
-    return false;
-  }
   LOG(
     "  start state. pattern:%u, step:%u\n",
     pattern->pattern_index,
@@ -1537,17 +1532,14 @@ static CaptureList *ts_query_cursor__prepare_to_capture(
 }
 
 // Duplicate the given state and insert the newly-created state immediately after
-// the given state in the `states` array.
+// the given state in the `states` array. Ensures that the given state reference is
+// still valid, even if the states array is reallocated.
 static QueryState *ts_query_cursor__copy_state(
   TSQueryCursor *self,
-  unsigned state_index
+  QueryState **state_ref
 ) {
-  if (self->states.size >= MAX_STATE_COUNT) {
-    LOG("  too many states");
-    return NULL;
-  }
-
-  const QueryState *state = &self->states.contents[state_index];
+  const QueryState *state = *state_ref;
+  uint32_t state_index = state - self->states.contents;
   QueryState copy = *state;
   copy.capture_list_id = NONE;
 
@@ -1563,6 +1555,7 @@ static QueryState *ts_query_cursor__copy_state(
   }
 
   array_insert(&self->states, state_index + 1, copy);
+  *state_ref = &self->states.contents[state_index];
   return &self->states.contents[state_index + 1];
 }
 
@@ -1774,7 +1767,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
           !step->is_pattern_start &&
           step->contains_captures
         ) {
-          if (ts_query_cursor__copy_state(self, i)) {
+          if (ts_query_cursor__copy_state(self, &state)) {
             LOG(
               "  split state for capture. pattern:%u, step:%u\n",
               state->pattern_index,
@@ -1829,7 +1822,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
               continue;
             }
 
-            QueryState *copy = ts_query_cursor__copy_state(self, j);
+            QueryState *copy = ts_query_cursor__copy_state(self, &state);
             if (next_step->is_pass_through) {
               state->step_index++;
               j--;
@@ -1862,7 +1855,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         }
 
         // Enfore the longest-match criteria. When a query pattern contains optional or
-        // repeated nodes, this is necesssary to avoid multiple redundant states, where
+        // repeated nodes, this is necessary to avoid multiple redundant states, where
         // one state has a strict subset of another state's captures.
         bool did_remove = false;
         for (unsigned j = i + 1; j < self->states.size; j++) {

From 411f69d13be8954baff074f4180ae4fdb5537453 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 30 Jul 2020 13:34:34 -0700
Subject: [PATCH 59/71] query: Optimize 'longest-match' filtering

---
 lib/src/query.c | 85 ++++++++++++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 39 deletions(-)

diff --git a/lib/src/query.c b/lib/src/query.c
index 15827cd7..c839c299 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -118,7 +118,7 @@ typedef struct {
   uint16_t step_index;
   uint16_t pattern_index;
   uint16_t capture_list_id;
-  uint16_t consumed_capture_count: 14;
+  uint16_t consumed_capture_count: 12;
   bool seeking_immediate_match: 1;
   bool has_in_progress_alternatives: 1;
   bool dead: 1;
@@ -1860,47 +1860,54 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         bool did_remove = false;
         for (unsigned j = i + 1; j < self->states.size; j++) {
           QueryState *other_state = &self->states.contents[j];
+          if (other_state->dead) {
+            array_erase(&self->states, j);
+            j--;
+            continue;
+          }
+
+          // When query states are copied in order
           if (
-            !other_state->dead &&
-            state->pattern_index == other_state->pattern_index &&
-            state->start_depth == other_state->start_depth
-          ) {
-            bool left_contains_right, right_contains_left;
-            ts_query_cursor__compare_captures(
-              self,
-              state,
-              other_state,
-              &left_contains_right,
-              &right_contains_left
-            );
-            if (left_contains_right) {
-              if (state->step_index == other_state->step_index) {
-                LOG(
-                  "  drop shorter state. pattern: %u, step_index: %u\n",
-                  state->pattern_index,
-                  state->step_index
-                );
-                capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
-                array_erase(&self->states, j);
-                j--;
-                continue;
-              }
-              other_state->has_in_progress_alternatives = true;
+            other_state->start_depth != state->start_depth ||
+            other_state->pattern_index != state->pattern_index
+          ) break;
+
+          bool left_contains_right, right_contains_left;
+          ts_query_cursor__compare_captures(
+            self,
+            state,
+            other_state,
+            &left_contains_right,
+            &right_contains_left
+          );
+          if (left_contains_right) {
+            if (state->step_index == other_state->step_index) {
+              LOG(
+                "  drop shorter state. pattern: %u, step_index: %u\n",
+                state->pattern_index,
+                state->step_index
+              );
+              capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
+              array_erase(&self->states, j);
+              j--;
+              continue;
             }
-            if (right_contains_left) {
-              if (state->step_index == other_state->step_index) {
-                LOG(
-                  "  drop shorter state. pattern: %u, step_index: %u\n",
-                  state->pattern_index,
-                  state->step_index
-                );
-                capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
-                array_erase(&self->states, i);
-                did_remove = true;
-                break;
-              }
-              state->has_in_progress_alternatives = true;
+            other_state->has_in_progress_alternatives = true;
+          }
+          if (right_contains_left) {
+            if (state->step_index == other_state->step_index) {
+              LOG(
+                "  drop shorter state. pattern: %u, step_index: %u\n",
+                state->pattern_index,
+                state->step_index
+              );
+              capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
+              array_erase(&self->states, i);
+              i--;
+              did_remove = true;
+              break;
             }
+            state->has_in_progress_alternatives = true;
           }
         }
 

From f265e63d488d14e06d905b2ddabe879afdb62945 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 30 Jul 2020 13:35:04 -0700
Subject: [PATCH 60/71] tags: Allow def or ref node to be a sibling of the name
 node

---
 tags/src/lib.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index 07209e4d..c247c13e 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -459,7 +459,8 @@ where
                             }
                         }
 
-                        let range = tag_node.byte_range();
+                        let rng = tag_node.byte_range();
+                        let range = rng.start.min(name_range.start)..rng.end.max(name_range.end);
                         let span = name_node.start_position()..name_node.end_position();
 
                         // Compute tag properties that depend on the text of the containing line. If the

From af655547e5817efbdf350935555b4aaf2642c618 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 31 Jul 2020 12:47:06 -0700
Subject: [PATCH 61/71] Fix handling of queries with many patterns with leading
 repetitions

---
 cli/src/tests/query_test.rs |  53 ++++++++++++++++-
 lib/src/query.c             | 115 ++++++++++++++++++++++++++----------
 2 files changed, 135 insertions(+), 33 deletions(-)

diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs
index 914d41cd..a377ca51 100644
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@@ -382,7 +382,7 @@ fn test_query_matches_with_many_overlapping_results() {
         )
         .unwrap();
 
-        let count = 80;
+        let count = 1024;
 
         // Deeply nested chained function calls:
         // a
@@ -547,8 +547,8 @@ fn test_query_matches_with_immediate_siblings() {
             &[
                 (0, vec![("parent", "a"), ("child", "b")]),
                 (0, vec![("parent", "b"), ("child", "c")]),
-                (1, vec![("last-child", "d")]),
                 (0, vec![("parent", "c"), ("child", "d")]),
+                (1, vec![("last-child", "d")]),
                 (2, vec![("first-element", "w")]),
                 (2, vec![("first-element", "1")]),
             ],
@@ -732,6 +732,55 @@ fn test_query_matches_with_nested_repetitions() {
     });
 }
 
+#[test]
+fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+
+        // When this query sees a comment, it must keep track of several potential
+        // matches: up to two for each pattern that begins with a comment.
+        let query = Query::new(
+            language,
+            r#"
+            (call_expression
+                function: (member_expression
+                    property: (property_identifier) @name)) @ref.method
+
+            ((comment)* @doc (function_declaration))
+            ((comment)* @doc (generator_function_declaration))
+            ((comment)* @doc (class_declaration))
+            ((comment)* @doc (lexical_declaration))
+            ((comment)* @doc (variable_declaration))
+            ((comment)* @doc (method_definition))
+
+            (comment) @comment
+            "#,
+        )
+        .unwrap();
+
+        // Here, a series of comments occurs in the middle of a match of the first
+        // pattern. To avoid exceeding the storage limits and discarding that outer
+        // match, the comment-related matches need to be managed efficiently.
+        let source = format!(
+            "theObject\n{}\n.theMethod()",
+            "  // the comment\n".repeat(64)
+        );
+
+        assert_query_matches(
+            language,
+            &query,
+            &source,
+            &vec![(7, vec![("comment", "// the comment")]); 64]
+                .into_iter()
+                .chain(vec![(
+                    0,
+                    vec![("ref.method", source.as_str()), ("name", "theMethod")],
+                )])
+                .collect::<Vec<_>>(),
+        );
+    });
+}
+
 #[test]
 fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() {
     allocations::record(|| {
diff --git a/lib/src/query.c b/lib/src/query.c
index c839c299..8c8bd4c3 100644
--- a/lib/src/query.c
+++ b/lib/src/query.c
@@ -48,7 +48,6 @@ typedef struct {
   uint16_t alternative_index;
   uint16_t depth;
   bool contains_captures: 1;
-  bool is_pattern_start: 1;
   bool is_immediate: 1;
   bool is_last_child: 1;
   bool is_pass_through: 1;
@@ -449,7 +448,6 @@ static QueryStep query_step__new(
     .alternative_index = NONE,
     .contains_captures = false,
     .is_last_child = false,
-    .is_pattern_start = false,
     .is_pass_through = false,
     .is_dead_end = false,
     .is_immediate = is_immediate,
@@ -547,6 +545,23 @@ static inline void ts_query__pattern_map_insert(
 ) {
   uint32_t index;
   ts_query__pattern_map_search(self, symbol, &index);
+
+  // Ensure that the entries are sorted not only by symbol, but also
+  // by pattern_index. This way, states for earlier patterns will be
+  // initiated first, which allows the ordering of the states array
+  // to be maintained more efficiently.
+  while (index < self->pattern_map.size) {
+    PatternEntry *entry = &self->pattern_map.contents[index];
+    if (
+      self->steps.contents[entry->step_index].symbol == symbol &&
+      entry->pattern_index < pattern_index
+    ) {
+      index++;
+    } else {
+      break;
+    }
+  }
+
   array_insert(&self->pattern_map, index, ((PatternEntry) {
     .step_index = start_step_index,
     .pattern_index = pattern_index,
@@ -1168,7 +1183,6 @@ TSQuery *ts_query_new(
     // Maintain a map that can look up patterns for a given root symbol.
     for (;;) {
       QueryStep *step = &self->steps.contents[start_step_index];
-      step->is_pattern_start = true;
       ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index);
       if (step->symbol == WILDCARD_SYMBOL) {
         self->wildcard_root_pattern_count++;
@@ -1178,6 +1192,7 @@ TSQuery *ts_query_new(
       // then add multiple entries to the pattern map.
       if (step->alternative_index != NONE) {
         start_step_index = step->alternative_index;
+        step->alternative_index = NONE;
       } else {
         break;
       }
@@ -1460,27 +1475,62 @@ void ts_query_cursor__compare_captures(
   }
 }
 
-static bool ts_query_cursor__add_state(
+static void ts_query_cursor__add_state(
   TSQueryCursor *self,
   const PatternEntry *pattern
 ) {
+  QueryStep *step = &self->query->steps.contents[pattern->step_index];
+  uint32_t start_depth = self->depth - step->depth;
+
+  // Keep the states array in ascending order of start_depth and pattern_index,
+  // so that it can be processed more efficiently elsewhere. Usually, there is
+  // no work to do here because of two facts:
+  // * States with lower start_depth are naturally added first due to the
+  //   order in which nodes are visited.
+  // * Earlier patterns are naturally added first because of the ordering of the
+  //   pattern_map data structure that's used to initiate matches.
+  //
+  // This loop is only needed in cases where two conditions hold:
+  // * A pattern consists of more than one sibling node, so that its states
+  //   remain in progress after exiting the node that started the match.
+  // * The first node in the pattern matches against multiple nodes at the
+  //   same depth.
+  //
+  // An example of this is the pattern '((comment)* (function))'. If multiple
+  // `comment` nodes appear in a row, then we may initiate a new state for this
+  // pattern while another state for the same pattern is already in progress.
+  // If there are multiple patterns like this in a query, then this loop will
+  // need to execute in order to keep the states ordered by pattern_index.
+  uint32_t index = self->states.size;
+  while (index > 0) {
+    QueryState *prev_state = &self->states.contents[index - 1];
+    if (prev_state->start_depth < start_depth) break;
+    if (prev_state->start_depth == start_depth) {
+      if (prev_state->pattern_index < pattern->pattern_index) break;
+      if (prev_state->pattern_index == pattern->pattern_index) {
+        // Avoid unnecessarily inserting an unnecessary duplicate state,
+        // which would be immediately pruned by the longest-match criteria.
+        if (prev_state->step_index == pattern->step_index) return;
+      }
+    }
+    index--;
+  }
+
   LOG(
     "  start state. pattern:%u, step:%u\n",
     pattern->pattern_index,
     pattern->step_index
   );
-  QueryStep *step = &self->query->steps.contents[pattern->step_index];
-  array_push(&self->states, ((QueryState) {
+  array_insert(&self->states, index, ((QueryState) {
     .capture_list_id = NONE,
     .step_index = pattern->step_index,
     .pattern_index = pattern->pattern_index,
-    .start_depth = self->depth - step->depth,
+    .start_depth = start_depth,
     .consumed_capture_count = 0,
-    .seeking_immediate_match = false,
+    .seeking_immediate_match = true,
     .has_in_progress_alternatives = false,
     .dead = false,
   }));
-  return true;
 }
 
 // Acquire a capture list for this state. If there are no capture lists left in the
@@ -1682,7 +1732,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         // If this node matches the first step of the pattern, then add a new
         // state at the start of this pattern.
         if (step->field && field_id != step->field) continue;
-        if (!ts_query_cursor__add_state(self, pattern)) break;
+        ts_query_cursor__add_state(self, pattern);
       }
 
       // Add new states for any patterns whose root node matches this node.
@@ -1694,7 +1744,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
           // If this node matches the first step of the pattern, then add a new
           // state at the start of this pattern.
           if (step->field && field_id != step->field) continue;
-          if (!ts_query_cursor__add_state(self, pattern)) break;
+          ts_query_cursor__add_state(self, pattern);
 
           // Advance to the next pattern whose root node matches this node.
           i++;
@@ -1762,11 +1812,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         // parent, then this query state cannot simply be updated in place. It must be
         // split into two states: one that matches this node, and one which skips over
         // this node, to preserve the possibility of matching later siblings.
-        if (
-          later_sibling_can_match &&
-          !step->is_pattern_start &&
-          step->contains_captures
-        ) {
+        if (later_sibling_can_match && step->contains_captures) {
           if (ts_query_cursor__copy_state(self, &state)) {
             LOG(
               "  split state for capture. pattern:%u, step:%u\n",
@@ -1822,25 +1868,27 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
               continue;
             }
 
-            QueryState *copy = ts_query_cursor__copy_state(self, &state);
             if (next_step->is_pass_through) {
               state->step_index++;
               j--;
             }
+
+            QueryState *copy = ts_query_cursor__copy_state(self, &state);
             if (copy) {
-              copy_count++;
+              LOG(
+                "  split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
+                copy->pattern_index,
+                copy->step_index,
+                next_step->alternative_index,
+                next_step->alternative_is_immediate,
+                capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
+              );
               end_index++;
+              copy_count++;
               copy->step_index = next_step->alternative_index;
               if (next_step->alternative_is_immediate) {
                 copy->seeking_immediate_match = true;
               }
-              LOG(
-                "  split state for branch. pattern:%u, step:%u, step:%u, immediate:%d\n",
-                copy->pattern_index,
-                state->step_index,
-                copy->step_index,
-                copy->seeking_immediate_match
-              );
             }
           }
         }
@@ -1860,13 +1908,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         bool did_remove = false;
         for (unsigned j = i + 1; j < self->states.size; j++) {
           QueryState *other_state = &self->states.contents[j];
-          if (other_state->dead) {
-            array_erase(&self->states, j);
-            j--;
-            continue;
-          }
 
-          // When query states are copied in order
+          // Query states are kept in ascending order of start_depth and pattern_index.
+          // Since the longest-match criteria is only used for deduping matches of the same
+          // pattern and root node, we only need to perform pairwise comparisons within a
+          // small slice of the states array.
           if (
             other_state->start_depth != state->start_depth ||
             other_state->pattern_index != state->pattern_index
@@ -1914,6 +1960,13 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
         // If there the state is at the end of its pattern, remove it from the list
         // of in-progress states and add it to the list of finished states.
         if (!did_remove) {
+          LOG(
+            "  keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
+            state->pattern_index,
+            state->start_depth,
+            state->step_index,
+            capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
+          );
           QueryStep *next_step = &self->query->steps.contents[state->step_index];
           if (next_step->depth == PATTERN_DONE_MARKER) {
             if (state->has_in_progress_alternatives) {

From 1a571ae20877c7bfac1fa59f0cc38027fe669685 Mon Sep 17 00:00:00 2001
From: Patrick Thomson <patrickt@github.com>
Date: Tue, 4 Aug 2020 17:53:47 -0400
Subject: [PATCH 62/71] Add errors_present field to tagging context.

---
 tags/src/c_lib.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index 07e1e19a..b93c69a2 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -52,6 +52,7 @@ pub struct TSTagsBuffer {
     context: TagsContext,
     tags: Vec<TSTag>,
     docs: Vec<u8>,
+    errors_present: bool,
 }
 
 #[no_mangle]
@@ -184,6 +185,7 @@ pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer {
         context: TagsContext::new(),
         tags: Vec::with_capacity(64),
         docs: Vec::with_capacity(64),
+        errors_present: false,
     }))
 }
 
@@ -216,6 +218,12 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 {
     buffer.docs.len() as u32
 }
 
+#[no_mangle]
+pub extern "C" fn ts_tagger_errors_present(this: *const TSTagsBuffer) -> bool {
+    let buffer = unwrap_ptr(this);
+    buffer.errors_present
+}
+
 #[no_mangle]
 pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name(
     this: *mut TSTagger,

From 5a52dc2cd700170196753481db1e8aa261e50d50 Mon Sep 17 00:00:00 2001
From: Patrick Thomson <patrickt@github.com>
Date: Wed, 5 Aug 2020 11:18:59 -0400
Subject: [PATCH 63/71] Return an iterator-bool tuple instead of just an
 iterator.

---
 cli/src/tags.rs   | 3 ++-
 tags/src/c_lib.rs | 5 ++++-
 tags/src/lib.rs   | 6 +++---
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/cli/src/tags.rs b/cli/src/tags.rs
index 5ea00f39..5e999693 100644
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@@ -53,7 +53,8 @@ pub fn generate_tags(
 
             let source = fs::read(path)?;
             let t0 = Instant::now();
-            for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? {
+            let (tagged, _) = context.generate_tags(tags_config, &source, Some(&cancellation_flag))?;
+            for tag in tagged {
                 let tag = tag?;
                 if !quiet {
                     write!(
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index b93c69a2..84f8c97b 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -126,7 +126,10 @@ pub extern "C" fn ts_tagger_tag(
             .context
             .generate_tags(config, source_code, cancellation_flag)
         {
-            Ok(tags) => tags,
+            Ok((tags, found_error)) => {
+                buffer.errors_present = found_error;
+                tags
+            }
             Err(e) => {
                 return match e {
                     Error::InvalidLanguage => TSTagsError::InvalidLanguage,
diff --git a/tags/src/lib.rs b/tags/src/lib.rs
index c247c13e..dd55d4be 100644
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@@ -255,7 +255,7 @@ impl TagsContext {
         config: &'a TagsConfiguration,
         source: &'a [u8],
         cancellation_flag: Option<&'a AtomicUsize>,
-    ) -> Result<impl Iterator<Item = Result<Tag, Error>> + 'a, Error> {
+    ) -> Result<(impl Iterator<Item = Result<Tag, Error>> + 'a, bool), Error> {
         self.parser
             .set_language(config.language)
             .map_err(|_| Error::InvalidLanguage)?;
@@ -271,7 +271,7 @@ impl TagsContext {
             .matches(&config.query, tree_ref.root_node(), move |node| {
                 &source[node.byte_range()]
             });
-        Ok(TagsIter {
+        Ok((TagsIter {
             _tree: tree,
             matches,
             source,
@@ -285,7 +285,7 @@ impl TagsContext {
                 inherits: false,
                 local_defs: Vec::new(),
             }],
-        })
+        }, tree_ref.root_node().has_error()))
     }
 }
 

From f4108056b0b5be57441493a279cb22fc3fd95829 Mon Sep 17 00:00:00 2001
From: Patrick Thomson <patrickt@github.com>
Date: Wed, 5 Aug 2020 11:33:04 -0400
Subject: [PATCH 64/71] Remove otiose pattern match.

---
 cli/src/tags.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cli/src/tags.rs b/cli/src/tags.rs
index 5e999693..122b58d2 100644
--- a/cli/src/tags.rs
+++ b/cli/src/tags.rs
@@ -53,8 +53,7 @@ pub fn generate_tags(
 
             let source = fs::read(path)?;
             let t0 = Instant::now();
-            let (tagged, _) = context.generate_tags(tags_config, &source, Some(&cancellation_flag))?;
-            for tag in tagged {
+            for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))?.0 {
                 let tag = tag?;
                 if !quiet {
                     write!(

From 5c86a9c654b7f2be39f55039ad114f277aa64a64 Mon Sep 17 00:00:00 2001
From: Patrick Thomson <patrickt@github.com>
Date: Wed, 5 Aug 2020 11:52:07 -0400
Subject: [PATCH 65/71] Fix the tests

---
 cli/src/tests/tags_test.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index 3ff1c92b..88e57ec1 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -102,6 +102,7 @@ fn test_tags_python() {
     let tags = tag_context
         .generate_tags(&tags_config, source, None)
         .unwrap()
+        .0
         .collect::<Result<Vec<_>, _>>()
         .unwrap();
 
@@ -153,6 +154,7 @@ fn test_tags_javascript() {
     let tags = tag_context
         .generate_tags(&tags_config, source, None)
         .unwrap()
+        .0
         .collect::<Result<Vec<_>, _>>()
         .unwrap();
 
@@ -189,6 +191,7 @@ fn test_tags_columns_measured_in_utf16_code_units() {
     let tag = tag_context
         .generate_tags(&tags_config, source, None)
         .unwrap()
+        .0
         .next()
         .unwrap()
         .unwrap();
@@ -229,6 +232,7 @@ fn test_tags_ruby() {
     let tags = tag_context
         .generate_tags(&tags_config, source.as_bytes(), None)
         .unwrap()
+        .0
         .collect::<Result<Vec<_>, _>>()
         .unwrap();
 
@@ -271,7 +275,7 @@ fn test_tags_cancellation() {
             .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
             .unwrap();
 
-        for (i, tag) in tags.enumerate() {
+        for (i, tag) in tags.0.enumerate() {
             if i == 150 {
                 cancellation_flag.store(1, Ordering::SeqCst);
             }

From 32f69dbe156030de5ae589d968efc2825bd0485f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 5 Aug 2020 09:06:00 -0700
Subject: [PATCH 66/71] tags, highlight: Limit the size of buffers that are
 retained in memory

---
 highlight/src/lib.rs | 16 ++++++++++++----
 tags/src/c_lib.rs    | 12 ++++++++++--
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs
index bb110219..1cffefa2 100644
--- a/highlight/src/lib.rs
+++ b/highlight/src/lib.rs
@@ -10,6 +10,8 @@ use tree_sitter::{
 };
 
 const CANCELLATION_CHECK_INTERVAL: usize = 100;
+const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
+const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
 
 /// Indicates which highlight should be applied to a region of source code.
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -884,11 +886,13 @@ where
 
 impl HtmlRenderer {
     pub fn new() -> Self {
-        HtmlRenderer {
-            html: Vec::new(),
-            line_offsets: vec![0],
+        let mut result = HtmlRenderer {
+            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
+            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
             carriage_return_highlight: None,
-        }
+        };
+        result.line_offsets.push(0);
+        result
     }
 
     pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
@@ -896,6 +900,10 @@ impl HtmlRenderer {
     }
 
     pub fn reset(&mut self) {
+        self.html.truncate(BUFFER_HTML_RESERVE_CAPACITY);
+        self.line_offsets.truncate(BUFFER_LINES_RESERVE_CAPACITY);
+        self.html.shrink_to_fit();
+        self.line_offsets.shrink_to_fit();
         self.html.clear();
         self.line_offsets.clear();
         self.line_offsets.push(0);
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index 07e1e19a..c2bec6ca 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -6,6 +6,9 @@ use std::sync::atomic::AtomicUsize;
 use std::{fmt, slice, str};
 use tree_sitter::Language;
 
+const BUFFER_TAGS_RESERVE_CAPACITY: usize = 100;
+const BUFFER_DOCS_RESERVE_CAPACITY: usize = 1024;
+
 #[repr(C)]
 #[derive(Debug, PartialEq, Eq)]
 pub enum TSTagsError {
@@ -116,8 +119,13 @@ pub extern "C" fn ts_tagger_tag(
     let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
 
     if let Some(config) = tagger.languages.get(scope_name) {
+        buffer.tags.truncate(BUFFER_TAGS_RESERVE_CAPACITY);
+        buffer.docs.truncate(BUFFER_DOCS_RESERVE_CAPACITY);
+        buffer.tags.shrink_to_fit();
+        buffer.docs.shrink_to_fit();
         buffer.tags.clear();
         buffer.docs.clear();
+
         let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) };
         let cancellation_flag = unsafe { cancellation_flag.as_ref() };
 
@@ -182,8 +190,8 @@ pub extern "C" fn ts_tagger_tag(
 pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer {
     Box::into_raw(Box::new(TSTagsBuffer {
         context: TagsContext::new(),
-        tags: Vec::with_capacity(64),
-        docs: Vec::with_capacity(64),
+        tags: Vec::with_capacity(BUFFER_TAGS_RESERVE_CAPACITY),
+        docs: Vec::with_capacity(BUFFER_DOCS_RESERVE_CAPACITY),
     }))
 }
 

From 94ab884ee4d0b965c8c16212979e15927976f068 Mon Sep 17 00:00:00 2001
From: Patrick Thomson <patrickt@github.com>
Date: Wed, 5 Aug 2020 12:16:09 -0400
Subject: [PATCH 67/71] Add a test.

---
 cli/src/tests/tags_test.rs | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs
index 88e57ec1..2b058c0b 100644
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@@ -297,6 +297,39 @@ fn test_invalid_capture() {
     assert_eq!(e, Error::InvalidCapture("method".to_string()));
 }
 
+#[test]
+fn test_tags_with_parse_error() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = br#"
+    class Fine: pass
+    class Bad
+    "#;
+
+    let (tags, failed) = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap();
+
+    let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
+
+    assert!(failed, "syntax error should have been detected");
+
+    assert_eq!(
+        newtags.iter()
+            .map(|t| (
+                substr(source, &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
+            .collect::<Vec<_>>(),
+        &[
+            ("Fine", "class"),
+        ]
+    );
+}
+
+
 #[test]
 fn test_tags_via_c_api() {
     allocations::record(|| {

From 7576b0b4485343902f54ab1dbe0464dd7ef4f920 Mon Sep 17 00:00:00 2001
From: Patrick Thomson <patrickt@github.com>
Date: Wed, 5 Aug 2020 12:21:42 -0400
Subject: [PATCH 68/71] Add accessor to the C header.

---
 tags/include/tree_sitter/tags.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h
index f2b17075..42109bee 100644
--- a/tags/include/tree_sitter/tags.h
+++ b/tags/include/tree_sitter/tags.h
@@ -88,6 +88,9 @@ uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *);
 // Get the syntax kinds for a scope.
 const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len);
 
+// Determine whether a parse error was encountered while tagging.
+bool ts_tagger_errors_present();
+
 #ifdef __cplusplus
 }
 #endif

From ec6af791af5761130238134e935ad6236aeb151c Mon Sep 17 00:00:00 2001
From: Patrick Thomson <patrickt@github.com>
Date: Wed, 5 Aug 2020 12:24:39 -0400
Subject: [PATCH 69/71] Bikeshed this name a little bit.

---
 tags/include/tree_sitter/tags.h | 2 +-
 tags/src/c_lib.rs               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h
index 42109bee..773113d7 100644
--- a/tags/include/tree_sitter/tags.h
+++ b/tags/include/tree_sitter/tags.h
@@ -89,7 +89,7 @@ uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *);
 const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len);
 
 // Determine whether a parse error was encountered while tagging.
-bool ts_tagger_errors_present();
+bool ts_tags_buffer_found_parse_error();
 
 #ifdef __cplusplus
 }
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index 84f8c97b..8cb5abb4 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -222,7 +222,7 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 {
 }
 
 #[no_mangle]
-pub extern "C" fn ts_tagger_errors_present(this: *const TSTagsBuffer) -> bool {
+pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool {
     let buffer = unwrap_ptr(this);
     buffer.errors_present
 }

From f91b19c08947aad20e095a4103cf144794baf16d Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 5 Aug 2020 09:57:45 -0700
Subject: [PATCH 70/71] tags, highlight: Avoid completely deallocating buffers
 when shrinking

---
 highlight/src/lib.rs | 16 ++++++++++------
 tags/src/c_lib.rs    | 16 ++++++++++------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs
index 1cffefa2..e4aebbfb 100644
--- a/highlight/src/lib.rs
+++ b/highlight/src/lib.rs
@@ -900,12 +900,8 @@ impl HtmlRenderer {
     }
 
     pub fn reset(&mut self) {
-        self.html.truncate(BUFFER_HTML_RESERVE_CAPACITY);
-        self.line_offsets.truncate(BUFFER_LINES_RESERVE_CAPACITY);
-        self.html.shrink_to_fit();
-        self.line_offsets.shrink_to_fit();
-        self.html.clear();
-        self.line_offsets.clear();
+        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
+        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
         self.line_offsets.push(0);
     }
 
@@ -1069,3 +1065,11 @@ fn injection_for_match<'a>(
 
     (language_name, content_node, include_children)
 }
+
+fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
+    if vec.len() > capacity {
+        vec.truncate(capacity);
+        vec.shrink_to_fit();
+    }
+    vec.clear();
+}
diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs
index c2bec6ca..b0786580 100644
--- a/tags/src/c_lib.rs
+++ b/tags/src/c_lib.rs
@@ -119,12 +119,8 @@ pub extern "C" fn ts_tagger_tag(
     let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
 
     if let Some(config) = tagger.languages.get(scope_name) {
-        buffer.tags.truncate(BUFFER_TAGS_RESERVE_CAPACITY);
-        buffer.docs.truncate(BUFFER_DOCS_RESERVE_CAPACITY);
-        buffer.tags.shrink_to_fit();
-        buffer.docs.shrink_to_fit();
-        buffer.tags.clear();
-        buffer.docs.clear();
+        shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY);
+        shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY);
 
         let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) };
         let cancellation_flag = unsafe { cancellation_flag.as_ref() };
@@ -262,3 +258,11 @@ fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
         abort();
     })
 }
+
+fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
+    if vec.len() > capacity {
+        vec.truncate(capacity);
+        vec.shrink_to_fit();
+    }
+    vec.clear();
+}

From 8d58a0d33a070af73dd6548d8000e0e7ddd04331 Mon Sep 17 00:00:00 2001
From: Patrick Thomson <patrickt@github.com>
Date: Wed, 5 Aug 2020 13:10:02 -0400
Subject: [PATCH 71/71] Add parameter in the header.

---
 tags/include/tree_sitter/tags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h
index 773113d7..4784abbb 100644
--- a/tags/include/tree_sitter/tags.h
+++ b/tags/include/tree_sitter/tags.h
@@ -89,7 +89,7 @@ uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *);
 const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len);
 
 // Determine whether a parse error was encountered while tagging.
-bool ts_tags_buffer_found_parse_error();
+bool ts_tags_buffer_found_parse_error(const TSTagsBuffer*);
 
 #ifdef __cplusplus
 }