From fa199e3a1a1f300e6acabe3546e92ba180167f65 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 16:04:02 -0700 Subject: [PATCH 01/32] Allow most tags to be arbitrarily named, remove hardcoded kinds --- tags/src/lib.rs | 104 ++++++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 69 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 8d1853bb..296ac9ba 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -4,7 +4,8 @@ use memchr::{memchr, memrchr}; use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{fmt, mem, str}; +use std::{mem, str}; +use std::collections::HashMap; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, }; @@ -18,12 +19,8 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; pub struct TagsConfiguration { pub language: Language, pub query: Query, - call_capture_index: Option, - class_capture_index: Option, + capture_map: HashMap, doc_capture_index: Option, - function_capture_index: Option, - method_capture_index: Option, - module_capture_index: Option, name_capture_index: Option, local_scope_capture_index: Option, local_definition_capture_index: Option, @@ -38,21 +35,13 @@ pub struct TagsContext { #[derive(Debug, Clone)] pub struct Tag { - pub kind: TagKind, pub range: Range, pub name_range: Range, pub line_range: Range, pub span: Range, pub docs: Option, -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum TagKind { - Function, - Method, - Class, - Module, - Call, + pub is_definition: bool, + pub kind: String, } #[derive(Debug, PartialEq)] @@ -111,29 +100,23 @@ impl TagsConfiguration { } } - let mut call_capture_index = None; - let mut class_capture_index = None; + let mut capture_map: HashMap = HashMap::new(); let mut doc_capture_index = None; - let mut function_capture_index = None; - let mut method_capture_index = None; - let mut module_capture_index = None; let mut name_capture_index = None; let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; for (i, name) in query.capture_names().iter().enumerate() { - let index = match name.as_str() { - "call" => &mut call_capture_index, - "class" => &mut class_capture_index, - "doc" => &mut doc_capture_index, - "function" => &mut function_capture_index, - "method" => &mut method_capture_index, - "module" => &mut module_capture_index, - "name" => &mut name_capture_index, - "local.scope" => &mut local_scope_capture_index, - "local.definition" => &mut local_definition_capture_index, - _ => continue, - }; - *index = Some(i as u32); + match name.as_str() { + "" => continue, + "name" => name_capture_index = Some(i as u32), + "doc" => doc_capture_index = Some(i as u32), + "local.scope" => local_scope_capture_index = Some(i as u32), + "local.definition" => local_definition_capture_index = Some(i as u32), + _ => { + capture_map.insert(i as u32, name.to_string()); + continue; + } + } } let pattern_info = (0..query.pattern_count()) @@ -180,12 +163,8 @@ impl TagsConfiguration { Ok(TagsConfiguration { language, query, - function_capture_index, - class_capture_index, - method_capture_index, - module_capture_index, + capture_map, doc_capture_index, - call_capture_index, name_capture_index, tags_pattern_index, local_scope_capture_index, @@ -303,7 +282,8 @@ where let mut name_range = None; let mut doc_nodes = Vec::new(); let mut tag_node = None; - let mut kind = TagKind::Call; + let mut kind = "unknown"; + let mut is_definition = false; let mut docs_adjacent_node = None; for capture in mat.captures { @@ -317,21 +297,18 @@ where name_range = Some(capture.node.byte_range()); } else if index == self.config.doc_capture_index { doc_nodes.push(capture.node); - } else if index == self.config.call_capture_index { + } + + if let Some(name) = self.config.capture_map.get(&capture.index) { tag_node = Some(capture.node); - kind = TagKind::Call; - } else if index == self.config.class_capture_index { - tag_node = Some(capture.node); - kind = TagKind::Class; - } else if index == self.config.function_capture_index { - tag_node = Some(capture.node); - kind = TagKind::Function; - } else if index == self.config.method_capture_index { - tag_node = Some(capture.node); - kind = TagKind::Method; - } else if index == self.config.module_capture_index { - tag_node = Some(capture.node); - kind = TagKind::Module; + kind = if name.starts_with("definition.") { + is_definition = true; + name.trim_start_matches("definition.") + } else if name.starts_with("reference.") { + name.trim_start_matches("reference.") + } else { + name + } } } @@ -414,10 +391,11 @@ where *tag = Tag { line_range: line_range(self.source, range.start, MAX_LINE_LEN), span: tag_node.start_position()..tag_node.end_position(), - kind, range, name_range, docs, + kind: kind.to_string(), + is_definition, }; } } @@ -427,10 +405,11 @@ where Tag { line_range: line_range(self.source, range.start, MAX_LINE_LEN), span: tag_node.start_position()..tag_node.end_position(), - kind, range, name_range, docs, + kind: kind.to_string(), + is_definition, }, mat.pattern_index, ), @@ -448,19 +427,6 @@ where } } -impl fmt::Display for TagKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - TagKind::Call => "Call", - TagKind::Module => "Module", - TagKind::Class => "Class", - TagKind::Method => "Method", - TagKind::Function => "Function", - } - .fmt(f) - } -} - impl From for Error { fn from(error: regex::Error) -> Self { Error::Regex(error) From 8d7459ed578b8f66bde36624c3f91e40d54d79a2 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 16:04:13 -0700 Subject: [PATCH 02/32] Bring c_lib inline --- tags/src/c_lib.rs | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 0c367977..c8ca8ed5 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -1,4 +1,4 @@ -use super::{Error, TagKind, TagsConfiguration, TagsContext}; +use super::{Error, TagsConfiguration, TagsContext}; use std::collections::HashMap; use std::ffi::CStr; use std::process::abort; @@ -19,16 +19,6 @@ pub enum TSTagsError { Unknown, } -#[repr(C)] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum TSTagKind { - Function, - Method, - Class, - Module, - Call, -} - #[repr(C)] pub struct TSPoint { row: u32, @@ -37,7 +27,6 @@ pub struct TSPoint { #[repr(C)] pub struct TSTag { - pub kind: TSTagKind, pub start_byte: u32, pub end_byte: u32, pub name_start_byte: u32, @@ -48,6 +37,8 @@ pub struct TSTag { pub end_point: TSPoint, pub docs_start_byte: u32, pub docs_end_byte: u32, + pub kind: String, + pub is_definition: bool, } pub struct TSTagger { @@ -153,13 +144,6 @@ pub extern "C" fn ts_tagger_tag( buffer.docs.extend_from_slice(docs.as_bytes()); } buffer.tags.push(TSTag { - kind: match tag.kind { - TagKind::Function => TSTagKind::Function, - TagKind::Method => TSTagKind::Method, - TagKind::Class => TSTagKind::Class, - TagKind::Module => TSTagKind::Module, - TagKind::Call => TSTagKind::Call, - }, start_byte: tag.range.start as u32, end_byte: tag.range.end as u32, name_start_byte: tag.name_range.start as u32, @@ -176,6 +160,8 @@ pub extern "C" fn ts_tagger_tag( }, docs_start_byte: prev_docs_len as u32, docs_end_byte: buffer.docs.len() as u32, + kind: tag.kind, + is_definition: tag.is_definition, }); } From 9bf4939b9a1093f6c42d0bdcf268fef8a4e04d8f Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 16:04:22 -0700 Subject: [PATCH 03/32] Show if tag is a def/ref in the cli --- cli/src/tags.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index d6704ec5..6308d396 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -42,9 +42,10 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> let tag = tag?; write!( &mut stdout, - " {:<8} {:<40}\t{:>9}-{:<9}", + " {:<8} {:<40}\t [{}] {:>9}-{:<9}", tag.kind, str::from_utf8(&source[tag.name_range]).unwrap_or(""), + if tag.is_definition { "definition" } else { "reference" }, tag.span.start, tag.span.end, )?; From d802b3779145d833dc16e3e075f8e34dd684504a Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 17:09:34 -0700 Subject: [PATCH 04/32] Bring back a SyntaxType enum --- cli/src/tags.rs | 2 +- tags/src/c_lib.rs | 28 ++++++++++++-- tags/src/lib.rs | 98 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 105 insertions(+), 23 deletions(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 6308d396..06f4f4fa 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -43,7 +43,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> write!( &mut stdout, " {:<8} {:<40}\t [{}] {:>9}-{:<9}", - tag.kind, + tag.syntax_type, str::from_utf8(&source[tag.name_range]).unwrap_or(""), if tag.is_definition { "definition" } else { "reference" }, tag.span.start, diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index c8ca8ed5..72c708d0 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -1,4 +1,4 @@ -use super::{Error, TagsConfiguration, TagsContext}; +use super::{Error, SyntaxType, TagsConfiguration, TagsContext}; use std::collections::HashMap; use std::ffi::CStr; use std::process::abort; @@ -19,6 +19,19 @@ pub enum TSTagsError { Unknown, } +#[repr(C)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TSSyntaxType { + Function, + Method, + Class, + Module, + Call, + Type, + Interface, + Implementation, +} + #[repr(C)] pub struct TSPoint { row: u32, @@ -37,7 +50,7 @@ pub struct TSTag { pub end_point: TSPoint, pub docs_start_byte: u32, pub docs_end_byte: u32, - pub kind: String, + pub syntax_type: TSSyntaxType, pub is_definition: bool, } @@ -160,7 +173,16 @@ pub extern "C" fn ts_tagger_tag( }, docs_start_byte: prev_docs_len as u32, docs_end_byte: buffer.docs.len() as u32, - kind: tag.kind, + syntax_type: match tag.syntax_type { + SyntaxType::Function => TSSyntaxType::Function, + SyntaxType::Method => TSSyntaxType::Method, + SyntaxType::Class => TSSyntaxType::Class, + SyntaxType::Module => TSSyntaxType::Module, + SyntaxType::Call => TSSyntaxType::Call, + SyntaxType::Type => TSSyntaxType::Type, + SyntaxType::Interface => TSSyntaxType::Interface, + SyntaxType::Implementation => TSSyntaxType::Implementation, + }, is_definition: tag.is_definition, }); } diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 296ac9ba..e6179b8b 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -4,7 +4,7 @@ use memchr::{memchr, memrchr}; use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{mem, str}; +use std::{fmt, mem, str}; use std::collections::HashMap; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, @@ -19,7 +19,7 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; pub struct TagsConfiguration { pub language: Language, pub query: Query, - capture_map: HashMap, + capture_map: HashMap, doc_capture_index: Option, name_capture_index: Option, local_scope_capture_index: Option, @@ -28,6 +28,27 @@ pub struct TagsConfiguration { pattern_info: Vec, } + +#[derive(Debug)] +pub struct NamedCapture { + pub syntax_type: SyntaxType, + pub is_definition: bool, +} + +// Should stay in sync with list of valid syntax types in semantic. +// See: https://github.com/github/semantic/blob/621696f5bc523a651f1cf9fc2ac58c557ea02d07/proto/semantic.proto#L165-L174 +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum SyntaxType { + Function, + Method, + Class, + Module, + Call, + Type, + Interface, + Implementation, +} + pub struct TagsContext { parser: Parser, cursor: QueryCursor, @@ -41,7 +62,7 @@ pub struct Tag { pub span: Range, pub docs: Option, pub is_definition: bool, - pub kind: String, + pub syntax_type: SyntaxType, } #[derive(Debug, PartialEq)] @@ -100,7 +121,7 @@ impl TagsConfiguration { } } - let mut capture_map: HashMap = HashMap::new(); + let mut capture_map: HashMap = HashMap::new(); let mut doc_capture_index = None; let mut name_capture_index = None; let mut local_scope_capture_index = None; @@ -112,9 +133,8 @@ impl TagsConfiguration { "doc" => doc_capture_index = Some(i as u32), "local.scope" => local_scope_capture_index = Some(i as u32), "local.definition" => local_definition_capture_index = Some(i as u32), - _ => { - capture_map.insert(i as u32, name.to_string()); - continue; + _ => if let Some(nc) = NamedCapture::new(name) { + capture_map.insert(i as u32, nc); } } } @@ -282,7 +302,7 @@ where let mut name_range = None; let mut doc_nodes = Vec::new(); let mut tag_node = None; - let mut kind = "unknown"; + let mut syntax_type = SyntaxType::Function; let mut is_definition = false; let mut docs_adjacent_node = None; @@ -299,16 +319,18 @@ where doc_nodes.push(capture.node); } - if let Some(name) = self.config.capture_map.get(&capture.index) { + if let Some(named_capture) = self.config.capture_map.get(&capture.index) { tag_node = Some(capture.node); - kind = if name.starts_with("definition.") { - is_definition = true; - name.trim_start_matches("definition.") - } else if name.starts_with("reference.") { - name.trim_start_matches("reference.") - } else { - name - } + syntax_type = named_capture.syntax_type; + is_definition = named_capture.is_definition; + // kind = if name.starts_with("definition.") { + // is_definition = true; + // name.trim_start_matches("definition.") + // } else if name.starts_with("reference.") { + // name.trim_start_matches("reference.") + // } else { + // name + // } } } @@ -394,7 +416,7 @@ where range, name_range, docs, - kind: kind.to_string(), + syntax_type, is_definition, }; } @@ -408,7 +430,7 @@ where range, name_range, docs, - kind: kind.to_string(), + syntax_type, is_definition, }, mat.pattern_index, @@ -427,6 +449,44 @@ where } } +impl NamedCapture { + pub fn new(name: &String) -> Option { + let mut is_definition = false; + + let kind = if name.starts_with("definition.") { + is_definition = true; + name.trim_start_matches("definition.") + } else if name.starts_with("reference.") { + name.trim_start_matches("reference.") + } else { + name + }; + + let syntax_type = match kind.as_ref() { + "function" => {is_definition = true; SyntaxType::Function}, + "method" => {is_definition = true; SyntaxType::Method}, + "class" => SyntaxType::Class, + "module" => SyntaxType::Module, + "call" => SyntaxType::Call, + "type" => SyntaxType::Type, + "interface" => SyntaxType::Interface, + "implementation" => SyntaxType::Implementation, + _ => return None, + }; + + return Some(NamedCapture{ + syntax_type, + is_definition + }) + } +} + +impl fmt::Display for SyntaxType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + impl From for Error { fn from(error: regex::Error) -> Self { Error::Regex(error) From 80f5c522594de99d487aa12a756f369ae48372a3 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 17:19:35 -0700 Subject: [PATCH 05/32] Tests compile --- cli/src/tests/tags_test.rs | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index fad8ebd8..b6283507 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -3,7 +3,7 @@ use super::helpers::fixtures::{get_language, get_language_queries_path}; use std::ffi::CString; use std::{fs, ptr, slice, str}; use tree_sitter_tags::c_lib as c; -use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; +use tree_sitter_tags::{Error, SyntaxType, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" ( @@ -99,12 +99,12 @@ fn test_tags_python() { assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.kind)) + .map(|t| (substr(source, &t.name_range), t.syntax_type)) .collect::>(), &[ - ("Customer", TagKind::Class), - ("age", TagKind::Function), - ("compute_age", TagKind::Call), + ("Customer", SyntaxType::Class), + ("age", SyntaxType::Function), + ("compute_age", SyntaxType::Call), ] ); @@ -150,12 +150,12 @@ fn test_tags_javascript() { assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.kind)) + .map(|t| (substr(source, &t.name_range), t.syntax_type)) .collect::>(), &[ - ("Customer", TagKind::Class), - ("getAge", TagKind::Method), - ("Agent", TagKind::Class) + ("Customer", SyntaxType::Class), + ("getAge", SyntaxType::Method), + ("Agent", SyntaxType::Class) ] ); assert_eq!( @@ -204,18 +204,18 @@ fn test_tags_ruby() { tags.iter() .map(|t| ( substr(source.as_bytes(), &t.name_range), - t.kind, + t.syntax_type, (t.span.start.row, t.span.start.column), )) .collect::>(), &[ - ("foo", TagKind::Method, (2, 0)), - ("bar", TagKind::Call, (7, 4)), - ("a", TagKind::Call, (7, 8)), - ("b", TagKind::Call, (7, 11)), - ("each", TagKind::Call, (9, 14)), - ("baz", TagKind::Call, (13, 8)), - ("b", TagKind::Call, (13, 15),), + ("foo", SyntaxType::Method, (2, 0)), + ("bar", SyntaxType::Call, (7, 4)), + ("a", SyntaxType::Call, (7, 8)), + ("b", SyntaxType::Call, (7, 11)), + ("each", SyntaxType::Call, (9, 14)), + ("baz", SyntaxType::Call, (13, 8)), + ("b", SyntaxType::Call, (13, 15),), ] ); } @@ -319,7 +319,7 @@ fn test_tags_via_c_api() { assert_eq!( tags.iter() .map(|tag| ( - tag.kind, + tag.syntax_type, &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize], &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize], &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize], @@ -327,18 +327,18 @@ fn test_tags_via_c_api() { .collect::>(), &[ ( - c::TSTagKind::Function, + c::TSSyntaxType::Function, "b", "function b() {", "one\ntwo\nthree" ), ( - c::TSTagKind::Class, + c::TSSyntaxType::Class, "C", "class C extends D {", "four\nfive" ), - (c::TSTagKind::Call, "b", "b(a);", "") + (c::TSSyntaxType::Call, "b", "b(a);", "") ] ); From 929bb40adcb3678b3a229a272222bd3edab62ecf Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 10:34:55 -0700 Subject: [PATCH 06/32] Shorten to def/ref --- cli/src/tags.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 06f4f4fa..4869b8cc 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -45,7 +45,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> " {:<8} {:<40}\t [{}] {:>9}-{:<9}", tag.syntax_type, str::from_utf8(&source[tag.name_range]).unwrap_or(""), - if tag.is_definition { "definition" } else { "reference" }, + if tag.is_definition { "def" } else { "ref" }, tag.span.start, tag.span.end, )?; From c08333e0cdbf0cb47253abe1eb856f3f80e4a9ea Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 10:35:07 -0700 Subject: [PATCH 07/32] Defer to debug formatting take 2 --- tags/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index e6179b8b..dd74f833 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -483,7 +483,7 @@ impl NamedCapture { impl fmt::Display for SyntaxType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) + format!("{:?}", self).fmt(f) } } From 3e8bf9daceb19c64cf3e84530d62594729000d1a Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 10:35:16 -0700 Subject: [PATCH 08/32] These are always definitions --- tags/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index dd74f833..991d3cb5 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -465,8 +465,8 @@ impl NamedCapture { let syntax_type = match kind.as_ref() { "function" => {is_definition = true; SyntaxType::Function}, "method" => {is_definition = true; SyntaxType::Method}, - "class" => SyntaxType::Class, - "module" => SyntaxType::Module, + "class" => {is_definition = true; SyntaxType::Class}, + "module" => {is_definition = true; SyntaxType::Module}, "call" => SyntaxType::Call, "type" => SyntaxType::Type, "interface" => SyntaxType::Interface, From 30132c682b22b57d7f42883f2cb8480691182551 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 14:12:14 -0700 Subject: [PATCH 09/32] Bring tags.h inline --- tags/include/tree_sitter/tags.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index 946dc6f1..e1ed68bd 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -19,15 +19,17 @@ typedef enum { } TSTagsError; typedef enum { - TSTagKindFunction, - TSTagKindMethod, - TSTagKindClass, - TSTagKindModule, - TSTagKindCall, -} TSTagKind; + TSSyntaxTypeFunction, + TSSyntaxTypeMethod, + TSSyntaxTypeClass, + TSSyntaxTypeModule, + TSSyntaxTypeCall, + TSSyntaxTypeType, + TSSyntaxTypeInterface, + TSSyntaxTypeImplementation, +} TSTagSyntaxType; typedef struct { - TSTagKind kind; uint32_t start_byte; uint32_t end_byte; uint32_t name_start_byte; @@ -38,6 +40,8 @@ typedef struct { TSPoint end_point; uint32_t docs_start_byte; uint32_t docs_end_byte; + TSTagSyntaxType syntax_type; + bool is_definition; } TSTag; typedef struct TSTagger TSTagger; From 15202d0b382a083ffa7d3019eec9348c5c35c7d9 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:11:31 -0700 Subject: [PATCH 10/32] Remove commented code --- tags/src/lib.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 991d3cb5..8cd73457 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -323,14 +323,6 @@ where tag_node = Some(capture.node); syntax_type = named_capture.syntax_type; is_definition = named_capture.is_definition; - // kind = if name.starts_with("definition.") { - // is_definition = true; - // name.trim_start_matches("definition.") - // } else if name.starts_with("reference.") { - // name.trim_start_matches("reference.") - // } else { - // name - // } } } From 3c39b016a4c538d645a7e0f5bdfd476e4588afd9 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:11:42 -0700 Subject: [PATCH 11/32] Trim whitespace from tag source lines --- tags/src/lib.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 8cd73457..32eaa0d9 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -495,7 +495,16 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range { let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1); let max_line_len = max_line_len.min(text.len() - start); let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len); - start..end + trim_start(text, start..end) +} + +fn trim_start(text: &[u8], r: Range) -> Range { + for (index, c) in text[r.start..r.end].iter().enumerate() { + if !c.is_ascii_whitespace(){ + return index..r.end + } + } + return r } #[cfg(test)] @@ -514,4 +523,13 @@ mod tests { assert_eq!(line_range(text, 5, 10), 4..8); assert_eq!(line_range(text, 11, 10), 9..14); } + + #[test] + fn test_get_line_trims() { + let text = b" foo\nbar\n"; + assert_eq!(line_range(text, 0, 10), 3..6); + + let text = b"\t func foo\nbar\n"; + assert_eq!(line_range(text, 0, 10), 2..10); + } } From 7b2514a6108593f9da31b4bb6638a145bfa77b51 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:12:16 -0700 Subject: [PATCH 12/32] Whitespace --- tags/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 32eaa0d9..d0746b3d 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -28,7 +28,6 @@ pub struct TagsConfiguration { pattern_info: Vec, } - #[derive(Debug)] pub struct NamedCapture { pub syntax_type: SyntaxType, From 819b800cf973418c7dbd73e628ae26401d618580 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:54:29 -0700 Subject: [PATCH 13/32] Pick up the proper initial index and test --- tags/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index d0746b3d..d57e3fb5 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -500,7 +500,7 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range { fn trim_start(text: &[u8], r: Range) -> Range { for (index, c) in text[r.start..r.end].iter().enumerate() { if !c.is_ascii_whitespace(){ - return index..r.end + return (r.start+index)..r.end } } return r @@ -530,5 +530,6 @@ mod tests { let text = b"\t func foo\nbar\n"; assert_eq!(line_range(text, 0, 10), 2..10); + assert_eq!(line_range(text, 11, 10), 11..14); } } From f24a952cb48706cf3134ad8da505462098b65348 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:54:36 -0700 Subject: [PATCH 14/32] Minor output changes --- cli/src/tags.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 4869b8cc..3493f616 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -34,20 +34,27 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> }; if let Some(tags_config) = language_config.tags_config(language)? { - let path_str = format!("{:?}", path); - writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?; + let ident = if paths.len() > 1 { + let path_str = format!("{:?}", path); + writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?; + "\t" + } else { + "" + }; let source = fs::read(path)?; for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? { let tag = tag?; write!( &mut stdout, - " {:<8} {:<40}\t [{}] {:>9}-{:<9}", - tag.syntax_type, + "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", + ident, str::from_utf8(&source[tag.name_range]).unwrap_or(""), + tag.syntax_type, if tag.is_definition { "def" } else { "ref" }, tag.span.start, tag.span.end, + str::from_utf8(&source[tag.line_range]).unwrap_or(""), )?; if let Some(docs) = tag.docs { if docs.len() > 120 { From 016ad53a2f4f5a79ef4164eaf57a13e5147eb53a Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 07:40:48 -0700 Subject: [PATCH 15/32] Trim end of lines as well --- tags/src/lib.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index d57e3fb5..1959c753 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -494,18 +494,27 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range { let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1); let max_line_len = max_line_len.min(text.len() - start); let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len); - trim_start(text, start..end) + trim_end(text, trim_start(text, start..end)) } fn trim_start(text: &[u8], r: Range) -> Range { for (index, c) in text[r.start..r.end].iter().enumerate() { - if !c.is_ascii_whitespace(){ + if !c.is_ascii_whitespace() { return (r.start+index)..r.end } } return r } +fn trim_end(text: &[u8], r: Range) -> Range { + for (index, c) in text[r.start..r.end].iter().rev().enumerate() { + if !c.is_ascii_whitespace() { + return r.start..(r.end-index) + } + } + return r +} + #[cfg(test)] mod tests { use super::*; @@ -528,8 +537,15 @@ mod tests { let text = b" foo\nbar\n"; assert_eq!(line_range(text, 0, 10), 3..6); - let text = b"\t func foo\nbar\n"; + let text = b"\t func foo \nbar\n"; assert_eq!(line_range(text, 0, 10), 2..10); - assert_eq!(line_range(text, 11, 10), 11..14); + + let r = line_range(text, 0, 14); + assert_eq!(r, 2..10); + assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo"); + + let r = line_range(text, 12, 14); + assert_eq!(r, 12..15); + assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar"); } } From 3bcb1f8c9405f77242a0c2f46dabfe4c8e59b53d Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 10:48:33 -0700 Subject: [PATCH 16/32] Assert line trimming --- cli/src/tests/tags_test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index b6283507..02d06ff6 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -108,10 +108,10 @@ fn test_tags_python() { ] ); - assert_eq!(substr(source, &tags[0].line_range), " class Customer:"); + assert_eq!(substr(source, &tags[0].line_range), "class Customer:"); assert_eq!( substr(source, &tags[1].line_range), - " def age(self):" + "def age(self):" ); assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer"); assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age"); From 54586c4e5bf5536bf075558b0529f4518f348676 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 14:42:30 -0700 Subject: [PATCH 17/32] Named captures are dynamic New c api for getting list of syntax_type names. --- tags/include/tree_sitter/tags.h | 16 ++--- tags/src/c_lib.rs | 46 +++++++------- tags/src/lib.rs | 105 +++++++++++++------------------- 3 files changed, 69 insertions(+), 98 deletions(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index e1ed68bd..f6113a0f 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -18,17 +18,6 @@ typedef enum { TSTagsInvalidQuery, } TSTagsError; -typedef enum { - TSSyntaxTypeFunction, - TSSyntaxTypeMethod, - TSSyntaxTypeClass, - TSSyntaxTypeModule, - TSSyntaxTypeCall, - TSSyntaxTypeType, - TSSyntaxTypeInterface, - TSSyntaxTypeImplementation, -} TSTagSyntaxType; - typedef struct { uint32_t start_byte; uint32_t end_byte; @@ -40,7 +29,7 @@ typedef struct { TSPoint end_point; uint32_t docs_start_byte; uint32_t docs_end_byte; - TSTagSyntaxType syntax_type; + uint32_t syntax_type_id; bool is_definition; } TSTag; @@ -93,6 +82,9 @@ uint32_t ts_tags_buffer_tags_len(const TSTagsBuffer *); const char *ts_tags_buffer_docs(const TSTagsBuffer *); uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *); +// Get the syntax kinds for a scope. +const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len); + #ifdef __cplusplus } #endif diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 72c708d0..6dc48195 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -1,4 +1,4 @@ -use super::{Error, SyntaxType, TagsConfiguration, TagsContext}; +use super::{Error, TagsConfiguration, TagsContext}; use std::collections::HashMap; use std::ffi::CStr; use std::process::abort; @@ -19,19 +19,6 @@ pub enum TSTagsError { Unknown, } -#[repr(C)] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum TSSyntaxType { - Function, - Method, - Class, - Module, - Call, - Type, - Interface, - Implementation, -} - #[repr(C)] pub struct TSPoint { row: u32, @@ -50,7 +37,7 @@ pub struct TSTag { pub end_point: TSPoint, pub docs_start_byte: u32, pub docs_end_byte: u32, - pub syntax_type: TSSyntaxType, + pub syntax_type_id: u32, pub is_definition: bool, } @@ -173,16 +160,7 @@ pub extern "C" fn ts_tagger_tag( }, docs_start_byte: prev_docs_len as u32, docs_end_byte: buffer.docs.len() as u32, - syntax_type: match tag.syntax_type { - SyntaxType::Function => TSSyntaxType::Function, - SyntaxType::Method => TSSyntaxType::Method, - SyntaxType::Class => TSSyntaxType::Class, - SyntaxType::Module => TSSyntaxType::Module, - SyntaxType::Call => TSSyntaxType::Call, - SyntaxType::Type => TSSyntaxType::Type, - SyntaxType::Interface => TSSyntaxType::Interface, - SyntaxType::Implementation => TSSyntaxType::Implementation, - }, + syntax_type_id: tag.syntax_type_id, is_definition: tag.is_definition, }); } @@ -231,6 +209,24 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { buffer.docs.len() as u32 } +#[no_mangle] +pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name( + this: *mut TSTagger, + scope_name: *const i8, + len: *mut u32, +) -> *const *const i8 { + let tagger = unwrap_mut_ptr(this); + let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; + let len = unwrap_mut_ptr(len); + + *len = 0; + if let Some(config) = tagger.languages.get(scope_name) { + *len = config.c_syntax_type_names.len() as u32; + return config.c_syntax_type_names.as_ptr() as *const *const i8 + } + std::ptr::null() +} + fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { unsafe { result.as_ref() }.unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 1959c753..3d5ce770 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -5,6 +5,7 @@ use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{fmt, mem, str}; +use std::ffi::CStr; use std::collections::HashMap; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, @@ -19,6 +20,8 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; pub struct TagsConfiguration { pub language: Language, pub query: Query, + syntax_type_names: Vec>, + c_syntax_type_names: Vec<*const u8>, capture_map: HashMap, doc_capture_index: Option, name_capture_index: Option, @@ -30,24 +33,10 @@ pub struct TagsConfiguration { #[derive(Debug)] pub struct NamedCapture { - pub syntax_type: SyntaxType, + pub syntax_type_id: u32, pub is_definition: bool, } -// Should stay in sync with list of valid syntax types in semantic. -// See: https://github.com/github/semantic/blob/621696f5bc523a651f1cf9fc2ac58c557ea02d07/proto/semantic.proto#L165-L174 -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum SyntaxType { - Function, - Method, - Class, - Module, - Call, - Type, - Interface, - Implementation, -} - pub struct TagsContext { parser: Parser, cursor: QueryCursor, @@ -61,7 +50,7 @@ pub struct Tag { pub span: Range, pub docs: Option, pub is_definition: bool, - pub syntax_type: SyntaxType, + pub syntax_type_id: u32, } #[derive(Debug, PartialEq)] @@ -70,6 +59,7 @@ pub enum Error { Regex(regex::Error), Cancelled, InvalidLanguage, + InvalidCapture(String), } #[derive(Debug, Default)] @@ -120,11 +110,13 @@ impl TagsConfiguration { } } - let mut capture_map: HashMap = HashMap::new(); + let mut capture_map = HashMap::new(); + let mut syntax_type_names = Vec::new(); let mut doc_capture_index = None; let mut name_capture_index = None; let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; + let mut syntax_type_id = 0; for (i, name) in query.capture_names().iter().enumerate() { match name.as_str() { "" => continue, @@ -132,12 +124,32 @@ impl TagsConfiguration { "doc" => doc_capture_index = Some(i as u32), "local.scope" => local_scope_capture_index = Some(i as u32), "local.definition" => local_definition_capture_index = Some(i as u32), - _ => if let Some(nc) = NamedCapture::new(name) { - capture_map.insert(i as u32, nc); + "local.reference" => continue, + _ => { + let mut is_definition = false; + + let kind = if name.starts_with("definition.") { + is_definition = true; + name.trim_start_matches("definition.") + } else if name.starts_with("reference.") { + name.trim_start_matches("reference.") + } else { + return Err(Error::InvalidCapture(name.to_string())) + }.to_string()+"\0"; + + capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition }); + syntax_type_id+=1; + if let Ok(cstr) = CStr::from_bytes_with_nul(kind.as_bytes()) { + syntax_type_names.push(cstr.to_bytes_with_nul().to_vec().into_boxed_slice()); + } } } } + let c_syntax_type_names = syntax_type_names.iter().map( |s| { + s.as_ptr() + }).collect(); + let pattern_info = (0..query.pattern_count()) .map(|pattern_index| { let mut info = PatternInfo::default(); @@ -182,6 +194,8 @@ impl TagsConfiguration { Ok(TagsConfiguration { language, query, + syntax_type_names, + c_syntax_type_names, capture_map, doc_capture_index, name_capture_index, @@ -191,6 +205,13 @@ impl TagsConfiguration { pattern_info, }) } + + pub fn syntax_type_name(&self, id: u32) -> &str { + unsafe { + let cstr = CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const i8).to_bytes(); + str::from_utf8(cstr).expect("syntax type name was not valid utf-8") + } + } } impl TagsContext { @@ -301,7 +322,7 @@ where let mut name_range = None; let mut doc_nodes = Vec::new(); let mut tag_node = None; - let mut syntax_type = SyntaxType::Function; + let mut syntax_type_id = 0; let mut is_definition = false; let mut docs_adjacent_node = None; @@ -320,7 +341,7 @@ where if let Some(named_capture) = self.config.capture_map.get(&capture.index) { tag_node = Some(capture.node); - syntax_type = named_capture.syntax_type; + syntax_type_id = named_capture.syntax_type_id; is_definition = named_capture.is_definition; } } @@ -407,7 +428,7 @@ where range, name_range, docs, - syntax_type, + syntax_type_id, is_definition, }; } @@ -421,7 +442,7 @@ where range, name_range, docs, - syntax_type, + syntax_type_id, is_definition, }, mat.pattern_index, @@ -440,44 +461,6 @@ where } } -impl NamedCapture { - pub fn new(name: &String) -> Option { - let mut is_definition = false; - - let kind = if name.starts_with("definition.") { - is_definition = true; - name.trim_start_matches("definition.") - } else if name.starts_with("reference.") { - name.trim_start_matches("reference.") - } else { - name - }; - - let syntax_type = match kind.as_ref() { - "function" => {is_definition = true; SyntaxType::Function}, - "method" => {is_definition = true; SyntaxType::Method}, - "class" => {is_definition = true; SyntaxType::Class}, - "module" => {is_definition = true; SyntaxType::Module}, - "call" => SyntaxType::Call, - "type" => SyntaxType::Type, - "interface" => SyntaxType::Interface, - "implementation" => SyntaxType::Implementation, - _ => return None, - }; - - return Some(NamedCapture{ - syntax_type, - is_definition - }) - } -} - -impl fmt::Display for SyntaxType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - format!("{:?}", self).fmt(f) - } -} - impl From for Error { fn from(error: regex::Error) -> Self { Error::Regex(error) From 75724698f0b668b6511b8dcf4bf718733abfffb5 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 14:42:41 -0700 Subject: [PATCH 18/32] Fix up tests --- cli/src/tests/tags_test.rs | 60 +++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 02d06ff6..cc339e0a 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -2,8 +2,9 @@ use super::helpers::allocations; use super::helpers::fixtures::{get_language, get_language_queries_path}; use std::ffi::CString; use std::{fs, ptr, slice, str}; +use std::ffi::CStr; use tree_sitter_tags::c_lib as c; -use tree_sitter_tags::{Error, SyntaxType, TagsConfiguration, TagsContext}; +use tree_sitter_tags::{Error, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" ( @@ -97,14 +98,15 @@ fn test_tags_python() { .collect::, _>>() .unwrap(); + assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.syntax_type)) + .map(|t| (substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id))) .collect::>(), &[ - ("Customer", SyntaxType::Class), - ("age", SyntaxType::Function), - ("compute_age", SyntaxType::Call), + ("Customer", "class"), + ("age", "function"), + ("compute_age", "call"), ] ); @@ -150,12 +152,12 @@ fn test_tags_javascript() { assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.syntax_type)) + .map(|t| (substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id))) .collect::>(), &[ - ("Customer", SyntaxType::Class), - ("getAge", SyntaxType::Method), - ("Agent", SyntaxType::Class) + ("Customer", "class"), + ("getAge", "method"), + ("Agent", "class") ] ); assert_eq!( @@ -204,18 +206,18 @@ fn test_tags_ruby() { tags.iter() .map(|t| ( substr(source.as_bytes(), &t.name_range), - t.syntax_type, + tags_config.syntax_type_name(t.syntax_type_id), (t.span.start.row, t.span.start.column), )) .collect::>(), &[ - ("foo", SyntaxType::Method, (2, 0)), - ("bar", SyntaxType::Call, (7, 4)), - ("a", SyntaxType::Call, (7, 8)), - ("b", SyntaxType::Call, (7, 11)), - ("each", SyntaxType::Call, (9, 14)), - ("baz", SyntaxType::Call, (13, 8)), - ("b", SyntaxType::Call, (13, 15),), + ("foo", "method", (2, 0)), + ("bar", "call", (7, 4)), + ("a", "call", (7, 8)), + ("b", "call", (7, 11)), + ("each", "call", (9, 14)), + ("baz", "call", (13, 8)), + ("b", "call", (13, 15),), ] ); } @@ -253,6 +255,14 @@ fn test_tags_cancellation() { }); } +#[test] +fn test_invalid_cpature() { + let language = get_language("python"); + let e = TagsConfiguration::new(language, "(identifier) @method", "") + .expect_err("expected InvalidCapture error"); + assert_eq!(e, Error::InvalidCapture("method".to_string())); +} + #[test] fn test_tags_via_c_api() { allocations::record(|| { @@ -316,10 +326,18 @@ fn test_tags_via_c_api() { }) .unwrap(); + let syntax_types: Vec<&str> = unsafe { + let mut len: u32 = 0; + let ptr = c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len); + slice::from_raw_parts(ptr, len as usize).iter().map(|i| { + CStr::from_ptr(*i).to_str().unwrap() + }).collect() + }; + assert_eq!( tags.iter() .map(|tag| ( - tag.syntax_type, + syntax_types[tag.syntax_type_id as usize], &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize], &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize], &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize], @@ -327,18 +345,18 @@ fn test_tags_via_c_api() { .collect::>(), &[ ( - c::TSSyntaxType::Function, + "function", "b", "function b() {", "one\ntwo\nthree" ), ( - c::TSSyntaxType::Class, + "class", "C", "class C extends D {", "four\nfive" ), - (c::TSSyntaxType::Call, "b", "b(a);", "") + ("call", "b", "b(a);", "") ] ); From b6ae67a6100a7c1fa6a249a2b4e0ff04378a41b5 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 14:43:10 -0700 Subject: [PATCH 19/32] Fix up CLI, use new syntax_type_name --- cli/src/tags.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 3493f616..515f4c52 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -50,7 +50,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", ident, str::from_utf8(&source[tag.name_range]).unwrap_or(""), - tag.syntax_type, + &tags_config.syntax_type_name(tag.syntax_type_id), if tag.is_definition { "def" } else { "ref" }, tag.span.start, tag.span.end, From 17d26c0d5a5d2b836a0b5f77414c007572589b97 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 14:43:27 -0700 Subject: [PATCH 20/32] Improved errors --- cli/src/error.rs | 2 +- tags/src/lib.rs | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cli/src/error.rs b/cli/src/error.rs index 824bd92f..d583d1b9 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -83,7 +83,7 @@ impl<'a> From for Error { impl<'a> From for Error { fn from(error: tree_sitter_tags::Error) -> Self { - Error::new(format!("{:?}", error)) + Error::new(format!("{}", error)) } } diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 3d5ce770..07fed3af 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -62,6 +62,15 @@ pub enum Error { InvalidCapture(String), } +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name), + _ => write!(f, "{:?}", self) + } + } +} + #[derive(Debug, Default)] struct PatternInfo { docs_adjacent_capture: Option, From ef15f4df24af34f685eefc630b2af69b1ee661b2 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 15:05:08 -0700 Subject: [PATCH 21/32] Dedupe items in syntax_type_names --- tags/src/lib.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 07fed3af..128a01cf 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -5,7 +5,7 @@ use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{fmt, mem, str}; -use std::ffi::CStr; +use std::ffi::{CStr, CString}; use std::collections::HashMap; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, @@ -125,7 +125,6 @@ impl TagsConfiguration { let mut name_capture_index = None; let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; - let mut syntax_type_id = 0; for (i, name) in query.capture_names().iter().enumerate() { match name.as_str() { "" => continue, @@ -144,12 +143,15 @@ impl TagsConfiguration { name.trim_start_matches("reference.") } else { return Err(Error::InvalidCapture(name.to_string())) - }.to_string()+"\0"; + }; - capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition }); - syntax_type_id+=1; - if let Ok(cstr) = CStr::from_bytes_with_nul(kind.as_bytes()) { - syntax_type_names.push(cstr.to_bytes_with_nul().to_vec().into_boxed_slice()); + if let Ok(cstr) = CString::new(kind) { + let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice(); + let syntax_type_id = syntax_type_names.iter().position(|n| { n == &c_kind }).unwrap_or_else(|| { + syntax_type_names.push(c_kind); + syntax_type_names.len() - 1 + }) as u32; + capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition }); } } } From f166947abb3fa834463dfb21b0044d30b0617795 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 15:05:27 -0700 Subject: [PATCH 22/32] Test updates, definition/reference prefix is now required --- cli/src/tests/tags_test.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index cc339e0a..540e2b01 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -10,65 +10,65 @@ const PYTHON_TAG_QUERY: &'static str = r#" ( (function_definition name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @function + body: (block . (expression_statement (string) @doc))) @definition.function (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") ) (function_definition - name: (identifier) @name) @function + name: (identifier) @name) @definition.function ( (class_definition name: (identifier) @name body: (block - . (expression_statement (string) @doc))) @class + . (expression_statement (string) @doc))) @definition.class (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") ) (class_definition - name: (identifier) @name) @class + name: (identifier) @name) @definition.class (call - function: (identifier) @name) @call + function: (identifier) @name) @reference.call "#; const JS_TAG_QUERY: &'static str = r#" ( (comment)* @doc . (class_declaration - name: (identifier) @name) @class - (#select-adjacent! @doc @class) + name: (identifier) @name) @definition.class + (#select-adjacent! @doc @definition.class) (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") ) ( (comment)* @doc . (method_definition - name: (property_identifier) @name) @method - (#select-adjacent! @doc @method) + name: (property_identifier) @name) @definition.method + (#select-adjacent! @doc @definition.method) (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") ) ( (comment)* @doc . (function_declaration - name: (identifier) @name) @function - (#select-adjacent! @doc @function) + name: (identifier) @name) @definition.function + (#select-adjacent! @doc @definition.function) (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") ) (call_expression - function: (identifier) @name) @call + function: (identifier) @name) @reference.call "#; const RUBY_TAG_QUERY: &'static str = r#" (method - name: (identifier) @name) @method + name: (identifier) @name) @definition.method (method_call - method: (identifier) @name) @call + method: (identifier) @name) @reference.call -((identifier) @name @call +((identifier) @name @reference.call (#is-not? local)) "#; @@ -256,7 +256,7 @@ fn test_tags_cancellation() { } #[test] -fn test_invalid_cpature() { +fn test_invalid_capture() { let language = get_language("python"); let e = TagsConfiguration::new(language, "(identifier) @method", "") .expect_err("expected InvalidCapture error"); From d9d3da994218339e525925b6cfda81247a22c001 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 16:04:05 -0700 Subject: [PATCH 23/32] Fill out rest of c errors --- tags/include/tree_sitter/tags.h | 1 + tags/src/c_lib.rs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index f6113a0f..58f5bbd9 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -16,6 +16,7 @@ typedef enum { TSTagsInvalidUtf8, TSTagsInvalidRegex, TSTagsInvalidQuery, + TSTagsInvalidCapture, } TSTagsError; typedef struct { diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 6dc48195..77f8aae5 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -16,6 +16,7 @@ pub enum TSTagsError { InvalidUtf8, InvalidRegex, InvalidQuery, + InvalidCapture, Unknown, } @@ -93,7 +94,9 @@ pub extern "C" fn ts_tagger_add_language( } Err(Error::Query(_)) => TSTagsError::InvalidQuery, Err(Error::Regex(_)) => TSTagsError::InvalidRegex, - Err(_) => TSTagsError::Unknown, + Err(Error::Cancelled) => TSTagsError::Timeout, + Err(Error::InvalidLanguage) => TSTagsError::InvalidLanguage, + Err(Error::InvalidCapture(_)) => TSTagsError::InvalidCapture, } } From d614c14c2cfc5911674f233ba7073c3dc3a90fdd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 8 Jul 2020 12:36:59 -0700 Subject: [PATCH 24/32] tags: Make spans refer to name, not entire def/ref Co-authored-by: Tim Clem Co-authored-by: Beka Valentine --- cli/src/tests/tags_test.rs | 23 ++++++++++++++++++----- tags/src/lib.rs | 12 +++++++----- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index fad8ebd8..f3df4b53 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -2,6 +2,7 @@ use super::helpers::allocations; use super::helpers::fixtures::{get_language, get_language_queries_path}; use std::ffi::CString; use std::{fs, ptr, slice, str}; +use tree_sitter::Point; use tree_sitter_tags::c_lib as c; use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; @@ -150,12 +151,24 @@ fn test_tags_javascript() { assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.kind)) + .map(|t| (substr(source, &t.name_range), t.span.clone(), t.kind)) .collect::>(), &[ - ("Customer", TagKind::Class), - ("getAge", TagKind::Method), - ("Agent", TagKind::Class) + ( + "Customer", + Point::new(5, 10)..Point::new(5, 18), + TagKind::Class + ), + ( + "getAge", + Point::new(9, 8)..Point::new(9, 14), + TagKind::Method + ), + ( + "Agent", + Point::new(15, 10)..Point::new(15, 15), + TagKind::Class + ) ] ); assert_eq!( @@ -209,7 +222,7 @@ fn test_tags_ruby() { )) .collect::>(), &[ - ("foo", TagKind::Method, (2, 0)), + ("foo", TagKind::Method, (2, 4)), ("bar", TagKind::Call, (7, 4)), ("a", TagKind::Call, (7, 8)), ("b", TagKind::Call, (7, 11)), diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 8d1853bb..613e56ac 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -300,7 +300,7 @@ where continue; } - let mut name_range = None; + let mut name_node = None; let mut doc_nodes = Vec::new(); let mut tag_node = None; let mut kind = TagKind::Call; @@ -314,7 +314,7 @@ where } if index == self.config.name_capture_index { - name_range = Some(capture.node.byte_range()); + name_node = Some(capture.node); } else if index == self.config.doc_capture_index { doc_nodes.push(capture.node); } else if index == self.config.call_capture_index { @@ -335,7 +335,9 @@ where } } - if let (Some(tag_node), Some(name_range)) = (tag_node, name_range) { + if let (Some(tag_node), Some(name_node)) = (tag_node, name_node) { + let name_range = name_node.byte_range(); + if pattern_info.name_must_be_non_local { let mut is_local = false; for scope in self.scopes.iter().rev() { @@ -413,7 +415,7 @@ where *pattern_index = mat.pattern_index; *tag = Tag { line_range: line_range(self.source, range.start, MAX_LINE_LEN), - span: tag_node.start_position()..tag_node.end_position(), + span: name_node.start_position()..name_node.end_position(), kind, range, name_range, @@ -426,7 +428,7 @@ where ( Tag { line_range: line_range(self.source, range.start, MAX_LINE_LEN), - span: tag_node.start_position()..tag_node.end_position(), + span: name_node.start_position()..name_node.end_position(), kind, range, name_range, From 255cf0a9cfe58654a40fd166dcbc3a0849073a22 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 8 Jul 2020 15:23:21 -0700 Subject: [PATCH 25/32] tags: Add utf16 column ranges to tags Also, ensure that line ranges contain only valid UTF8. Co-authored-by: Tim Clem Co-authored-by: Beka Valentine --- cli/src/tests/tags_test.rs | 42 +++++++--- tags/src/lib.rs | 159 +++++++++++++++++++++++++++---------- 2 files changed, 148 insertions(+), 53 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index f3df4b53..c81f6966 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -8,21 +8,21 @@ use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" ( - (function_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @function - (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") + (function_definition + name: (identifier) @name + body: (block . (expression_statement (string) @doc))) @function + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") ) (function_definition name: (identifier) @name) @function ( - (class_definition - name: (identifier) @name - body: (block - . (expression_statement (string) @doc))) @class - (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") + (class_definition + name: (identifier) @name + body: (block + . (expression_statement (string) @doc))) @class + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") ) (class_definition @@ -30,6 +30,10 @@ const PYTHON_TAG_QUERY: &'static str = r#" (call function: (identifier) @name) @call + +(call + function: (attribute + attribute: (identifier) @name)) @call "#; const JS_TAG_QUERY: &'static str = r#" @@ -179,6 +183,26 @@ fn test_tags_javascript() { assert_eq!(tags[2].docs, None); } +#[test] +fn test_tags_columns_measured_in_utf16_code_units() { + let language = get_language("python"); + let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); + let mut tag_context = TagsContext::new(); + + let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes(); + + let tag = tag_context + .generate_tags(&tags_config, source, None) + .unwrap() + .next() + .unwrap() + .unwrap(); + + assert_eq!(substr(source, &tag.name_range), "hello_α_ω"); + assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32)); + assert_eq!(tag.utf16_column_range, 9..18); +} + #[test] fn test_tags_ruby() { let language = get_language("ruby"); diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 613e56ac..a240666f 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -1,10 +1,10 @@ pub mod c_lib; -use memchr::{memchr, memrchr}; +use memchr::memchr; use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{fmt, mem, str}; +use std::{char, fmt, mem, str}; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, }; @@ -43,6 +43,7 @@ pub struct Tag { pub name_range: Range, pub line_range: Range, pub span: Range, + pub utf16_column_range: Range, pub docs: Option, } @@ -404,39 +405,32 @@ where // Only create one tag per node. The tag queue is sorted by node position // to allow for fast lookup. let range = tag_node.byte_range(); - match self - .tag_queue - .binary_search_by_key(&(name_range.end, name_range.start), |(tag, _)| { - (tag.name_range.end, tag.name_range.start) - }) { + let span = name_node.start_position()..name_node.end_position(); + let utf16_column_range = + get_utf16_column_range(self.source, &name_range, &span); + let line_range = + line_range(self.source, name_range.start, span.start, MAX_LINE_LEN); + let tag = Tag { + line_range, + span, + utf16_column_range, + kind, + range, + name_range, + docs, + }; + match self.tag_queue.binary_search_by_key( + &(tag.name_range.end, tag.name_range.start), + |(tag, _)| (tag.name_range.end, tag.name_range.start), + ) { Ok(i) => { - let (tag, pattern_index) = &mut self.tag_queue[i]; + let (existing_tag, pattern_index) = &mut self.tag_queue[i]; if *pattern_index > mat.pattern_index { *pattern_index = mat.pattern_index; - *tag = Tag { - line_range: line_range(self.source, range.start, MAX_LINE_LEN), - span: name_node.start_position()..name_node.end_position(), - kind, - range, - name_range, - docs, - }; + *existing_tag = tag; } } - Err(i) => self.tag_queue.insert( - i, - ( - Tag { - line_range: line_range(self.source, range.start, MAX_LINE_LEN), - span: name_node.start_position()..name_node.end_position(), - kind, - range, - name_range, - docs, - }, - mat.pattern_index, - ), - ), + Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)), } } } @@ -475,11 +469,92 @@ impl From for Error { } } -fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range { - let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1); - let max_line_len = max_line_len.min(text.len() - start); - let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len); - start..end +pub struct LossyUtf8<'a> { + bytes: &'a [u8], + in_replacement: bool, +} + +impl<'a> LossyUtf8<'a> { + pub fn new(bytes: &'a [u8]) -> Self { + LossyUtf8 { + bytes, + in_replacement: false, + } + } +} + +impl<'a> Iterator for LossyUtf8<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if self.bytes.is_empty() { + return None; + } + if self.in_replacement { + self.in_replacement = false; + return Some("\u{fffd}"); + } + match str::from_utf8(self.bytes) { + Ok(valid) => { + self.bytes = &[]; + Some(valid) + } + Err(error) => { + if let Some(error_len) = error.error_len() { + let error_start = error.valid_up_to(); + if error_start > 0 { + let result = + unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) }; + self.bytes = &self.bytes[(error_start + error_len)..]; + self.in_replacement = true; + Some(result) + } else { + self.bytes = &self.bytes[error_len..]; + Some("\u{fffd}") + } + } else { + None + } + } + } + } +} + +fn line_range( + text: &[u8], + start_byte: usize, + start_point: Point, + max_line_len: usize, +) -> Range { + let line_start_byte = start_byte - start_point.column; + let max_line_len = max_line_len.min(text.len() - line_start_byte); + let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)]; + let len = if let Some(len) = memchr(b'\n', text_after_line_start) { + len + } else { + match str::from_utf8(text_after_line_start) { + Ok(s) => s.len(), + Err(e) => e.valid_up_to(), + } + }; + line_start_byte..(line_start_byte + len) +} + +fn get_utf16_column_range( + text: &[u8], + byte_range: &Range, + point_range: &Range, +) -> Range { + let start = byte_range.start - point_range.start.column; + let preceding_text_on_line = &text[start..byte_range.start]; + let start_col = utf16_len(preceding_text_on_line); + start_col..(start_col + utf16_len(&text[byte_range.clone()])) +} + +fn utf16_len(bytes: &[u8]) -> usize { + LossyUtf8::new(bytes) + .flat_map(|chunk| chunk.chars().map(char::len_utf16)) + .sum() } #[cfg(test)] @@ -488,14 +563,10 @@ mod tests { #[test] fn test_get_line() { - let text = b"abc\ndefg\nhijkl"; - assert_eq!(line_range(text, 0, 10), 0..3); - assert_eq!(line_range(text, 1, 10), 0..3); - assert_eq!(line_range(text, 2, 10), 0..3); - assert_eq!(line_range(text, 3, 10), 0..3); - assert_eq!(line_range(text, 1, 2), 0..2); - assert_eq!(line_range(text, 4, 10), 4..8); - assert_eq!(line_range(text, 5, 10), 4..8); - assert_eq!(line_range(text, 11, 10), 9..14); + let text = "abc\ndefg❤hij\nklmno".as_bytes(); + assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14); + assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8); + assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20); + assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19); } } From b52f28d6d5d740a85e539cde221b6742106f488f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 11:28:07 -0700 Subject: [PATCH 26/32] Allow measuring time for tags subcommand --- cli/src/main.rs | 20 ++++++++++++--- cli/src/tags.rs | 67 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 61 insertions(+), 26 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 757c70eb..d7a5e7b1 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -97,6 +97,8 @@ fn run() -> error::Result<()> { .value_name("json|protobuf") .help("Determine output format (default: json)"), ) + .arg(Arg::with_name("quiet").long("quiet").short("q")) + .arg(Arg::with_name("time").long("quiet").short("t")) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg( Arg::with_name("inputs") @@ -149,8 +151,14 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("path").index(1).multiple(true)), ) .subcommand( - SubCommand::with_name("web-ui").about("Test a parser interactively in the browser") - .arg(Arg::with_name("quiet").long("quiet").short("q").help("open in default browser")), + SubCommand::with_name("web-ui") + .about("Test a parser interactively in the browser") + .arg( + Arg::with_name("quiet") + .long("quiet") + .short("q") + .help("open in default browser"), + ), ) .subcommand( SubCommand::with_name("dump-languages") @@ -268,7 +276,13 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("tags") { loader.find_all_languages(&config.parser_directories)?; let paths = collect_paths(matches.values_of("inputs").unwrap())?; - tags::generate_tags(&loader, matches.value_of("scope"), &paths)?; + tags::generate_tags( + &loader, + matches.value_of("scope"), + &paths, + matches.is_present("quiet"), + matches.is_present("time"), + )?; } else if let Some(matches) = matches.subcommand_matches("highlight") { loader.configure_highlights(&config.theme.highlight_names); loader.find_all_languages(&config.parser_directories)?; diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 515f4c52..5ea00f39 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -3,10 +3,17 @@ use super::util; use crate::error::{Error, Result}; use std::io::{self, Write}; use std::path::Path; +use std::time::Instant; use std::{fs, str}; use tree_sitter_tags::TagsContext; -pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> Result<()> { +pub fn generate_tags( + loader: &Loader, + scope: Option<&str>, + paths: &[String], + quiet: bool, + time: bool, +) -> Result<()> { let mut lang = None; if let Some(scope) = scope { lang = loader.language_configuration_for_scope(scope)?; @@ -34,36 +41,50 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> }; if let Some(tags_config) = language_config.tags_config(language)? { - let ident = if paths.len() > 1 { - let path_str = format!("{:?}", path); - writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?; - "\t" + let indent; + if paths.len() > 1 { + if !quiet { + writeln!(&mut stdout, "{}", path.to_string_lossy())?; + } + indent = "\t" } else { - "" + indent = ""; }; let source = fs::read(path)?; + let t0 = Instant::now(); for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? { let tag = tag?; - write!( - &mut stdout, - "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", - ident, - str::from_utf8(&source[tag.name_range]).unwrap_or(""), - &tags_config.syntax_type_name(tag.syntax_type_id), - if tag.is_definition { "def" } else { "ref" }, - tag.span.start, - tag.span.end, - str::from_utf8(&source[tag.line_range]).unwrap_or(""), - )?; - if let Some(docs) = tag.docs { - if docs.len() > 120 { - write!(&mut stdout, "\t{:?}...", &docs[0..120])?; - } else { - write!(&mut stdout, "\t{:?}", &docs)?; + if !quiet { + write!( + &mut stdout, + "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", + indent, + str::from_utf8(&source[tag.name_range]).unwrap_or(""), + &tags_config.syntax_type_name(tag.syntax_type_id), + if tag.is_definition { "def" } else { "ref" }, + tag.span.start, + tag.span.end, + str::from_utf8(&source[tag.line_range]).unwrap_or(""), + )?; + if let Some(docs) = tag.docs { + if docs.len() > 120 { + write!(&mut stdout, "\t{:?}...", &docs[0..120])?; + } else { + write!(&mut stdout, "\t{:?}", &docs)?; + } } + writeln!(&mut stdout, "")?; } - writeln!(&mut stdout, "")?; + } + + if time { + writeln!( + &mut stdout, + "{}time: {}ms", + indent, + t0.elapsed().as_millis(), + )?; } } else { eprintln!("No tags config found for path {:?}", path); From 1ecfc2548f1dfe0aa2ec34fb174555a27f37dde0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 11:30:30 -0700 Subject: [PATCH 27/32] tags: Move impls below type definitions --- tags/src/lib.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 7d58d99b..790b866a 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -63,15 +63,6 @@ pub enum Error { InvalidCapture(String), } -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name), - _ => write!(f, "{:?}", self) - } - } -} - #[derive(Debug, Default)] struct PatternInfo { docs_adjacent_capture: Option, @@ -475,6 +466,15 @@ where } } +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name), + _ => write!(f, "{:?}", self) + } + } +} + impl From for Error { fn from(error: regex::Error) -> Self { Error::Regex(error) From 52360b103d0b293c54e83a188d7f2f1b9a7dc5d8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 12:07:57 -0700 Subject: [PATCH 28/32] tags: Fix comment position --- tags/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 790b866a..41b4557a 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -423,8 +423,6 @@ where } } - // Only create one tag per node. The tag queue is sorted by node position - // to allow for fast lookup. let range = tag_node.byte_range(); let span = name_node.start_position()..name_node.end_position(); let utf16_column_range = @@ -441,6 +439,9 @@ where is_definition, syntax_type_id, }; + + // Only create one tag per node. The tag queue is sorted by node position + // to allow for fast lookup. match self.tag_queue.binary_search_by_key( &(tag.name_range.end, tag.name_range.start), |(tag, _)| (tag.name_range.end, tag.name_range.start), From 0f805603104cab4d59c9f02154720fd000b22305 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 12:13:12 -0700 Subject: [PATCH 29/32] tags: Reuse work when computing utf16 columns, line ranges --- tags/src/lib.rs | 64 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 41b4557a..ca5699ca 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -91,6 +91,7 @@ where matches: I, _tree: Tree, source: &'a [u8], + prev_line_info: Option, config: &'a TagsConfiguration, cancellation_flag: Option<&'a AtomicUsize>, iter_count: usize, @@ -98,6 +99,13 @@ where scopes: Vec>, } +struct LineInfo { + utf8_position: Point, + utf8_byte: usize, + utf16_column: usize, + line_range: Range, +} + impl TagsConfiguration { pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result { let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?; @@ -260,6 +268,7 @@ impl TagsContext { source, config, cancellation_flag, + prev_line_info: None, tag_queue: Vec::new(), iter_count: 0, scopes: vec![LocalScope { @@ -425,10 +434,46 @@ where let range = tag_node.byte_range(); let span = name_node.start_position()..name_node.end_position(); - let utf16_column_range = - get_utf16_column_range(self.source, &name_range, &span); - let line_range = - line_range(self.source, name_range.start, span.start, MAX_LINE_LEN); + + // Compute tag properties that depend on the text of the containing line. If the + // previous tag occurred on the same line, then reuse results from the previous tag. + let line_range; + let mut prev_utf16_column = 0; + let mut prev_utf8_byte = name_range.start - span.start.column; + let line_info = self.prev_line_info.as_ref().and_then(|info| { + if info.utf8_position.row == span.start.row { + Some(info) + } else { + None + } + }); + if let Some(line_info) = line_info { + line_range = line_info.line_range.clone(); + if line_info.utf8_position.column <= span.start.column { + prev_utf8_byte = line_info.utf8_byte; + prev_utf16_column = line_info.utf16_column; + } + } else { + line_range = self::line_range( + self.source, + name_range.start, + span.start, + MAX_LINE_LEN, + ); + } + + let utf16_start_column = prev_utf16_column + + utf16_len(&self.source[prev_utf8_byte..name_range.start]); + let utf16_end_column = + utf16_start_column + utf16_len(&self.source[name_range.clone()]); + let utf16_column_range = utf16_start_column..utf16_end_column; + + self.prev_line_info = Some(LineInfo { + utf8_position: span.end, + utf8_byte: name_range.end, + utf16_column: utf16_end_column, + line_range: line_range.clone(), + }); let tag = Tag { line_range, span, @@ -570,17 +615,6 @@ fn line_range( line_start_byte..line_end_byte } -fn get_utf16_column_range( - text: &[u8], - byte_range: &Range, - point_range: &Range, -) -> Range { - let line_start_byte = byte_range.start - point_range.start.column; - let preceding_text_on_line = &text[line_start_byte..byte_range.start]; - let start_col = utf16_len(preceding_text_on_line); - start_col..(start_col + utf16_len(&text[byte_range.clone()])) -} - fn utf16_len(bytes: &[u8]) -> usize { LossyUtf8::new(bytes) .flat_map(|chunk| chunk.chars().map(char::len_utf16)) From 9e38fd9f5c32b58919c1cb422f06c8021da98207 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 12:32:40 -0700 Subject: [PATCH 30/32] Add todo comment for LossyUtf8 iterator --- tags/src/lib.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index ca5699ca..dcbb9984 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -106,6 +106,11 @@ struct LineInfo { line_range: Range, } +struct LossyUtf8<'a> { + bytes: &'a [u8], + in_replacement: bool, +} + impl TagsConfiguration { pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result { let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?; @@ -533,13 +538,11 @@ impl From for Error { } } -pub struct LossyUtf8<'a> { - bytes: &'a [u8], - in_replacement: bool, -} - +// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` +// is ever stabilized, we should use that. Otherwise, this struct could be moved +// into some module that's shared between `tree-sitter-tags` and `tree-sitter-highlight`. impl<'a> LossyUtf8<'a> { - pub fn new(bytes: &'a [u8]) -> Self { + fn new(bytes: &'a [u8]) -> Self { LossyUtf8 { bytes, in_replacement: false, From 6cee04350f909c6611258ccaee06446e08218f0c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 13:39:47 -0700 Subject: [PATCH 31/32] tags: Expose utf16 column range to C API --- tags/include/tree_sitter/tags.h | 2 ++ tags/src/c_lib.rs | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index 58f5bbd9..f2b17075 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -28,6 +28,8 @@ typedef struct { uint32_t line_end_byte; TSPoint start_point; TSPoint end_point; + uint32_t utf16_start_column; + uint32_t utf16_end_column; uint32_t docs_start_byte; uint32_t docs_end_byte; uint32_t syntax_type_id; diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 77f8aae5..07e1e19a 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -36,6 +36,8 @@ pub struct TSTag { pub line_end_byte: u32, pub start_point: TSPoint, pub end_point: TSPoint, + pub utf16_start_colum: u32, + pub utf16_end_colum: u32, pub docs_start_byte: u32, pub docs_end_byte: u32, pub syntax_type_id: u32, @@ -161,6 +163,8 @@ pub extern "C" fn ts_tagger_tag( row: tag.span.end.row as u32, column: tag.span.end.column as u32, }, + utf16_start_colum: tag.utf16_column_range.start as u32, + utf16_end_colum: tag.utf16_column_range.end as u32, docs_start_byte: prev_docs_len as u32, docs_end_byte: buffer.docs.len() as u32, syntax_type_id: tag.syntax_type_id, @@ -225,7 +229,7 @@ pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name( *len = 0; if let Some(config) = tagger.languages.get(scope_name) { *len = config.c_syntax_type_names.len() as u32; - return config.c_syntax_type_names.as_ptr() as *const *const i8 + return config.c_syntax_type_names.as_ptr() as *const *const i8; } std::ptr::null() } From e4e785b567eb975c5fa6900b08728aac856bdaad Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 10 Jul 2020 13:47:56 -0700 Subject: [PATCH 32/32] Remove unused flags from tags CLI command --- cli/src/main.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index d7a5e7b1..713bf28f 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -90,13 +90,6 @@ fn run() -> error::Result<()> { ) .subcommand( SubCommand::with_name("tags") - .arg( - Arg::with_name("format") - .short("f") - .long("format") - .value_name("json|protobuf") - .help("Determine output format (default: json)"), - ) .arg(Arg::with_name("quiet").long("quiet").short("q")) .arg(Arg::with_name("time").long("quiet").short("t")) .arg(Arg::with_name("scope").long("scope").takes_value(true)) @@ -106,12 +99,6 @@ fn run() -> error::Result<()> { .index(1) .required(true) .multiple(true), - ) - .arg( - Arg::with_name("v") - .short("v") - .multiple(true) - .help("Sets the level of verbosity"), ), ) .subcommand(