From b5f2ed83fe60ef2d704ae59bbc2c3ba4e89757c7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 13 Mar 2020 13:02:56 -0700 Subject: [PATCH] tags: Implement select-adjacent! predicate --- cli/src/tests/tags_test.rs | 43 ++++---- tags/src/lib.rs | 201 +++++++++++++++++++++---------------- 2 files changed, 140 insertions(+), 104 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index f7b5f6ee..65d2096f 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -8,19 +8,19 @@ fn test_tags_python() { language, r#" ((function_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @function - (set! strip @doc "(^['\"\\s]*)|(['\"\\s]*$)")) + name: (identifier) @name + body: (block . (expression_statement (string) @doc))) @function + (strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")) (function_definition - name: (identifier) @name) @function + name: (identifier) @name) @function ((class_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @class - (set! strip @doc "(^['\"\\s]*)|(['\"\\s]*$)")) + name: (identifier) @name + body: (block . (expression_statement (string) @doc))) @class + (strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")) (class_definition - name: (identifier) @name) @class + name: (identifier) @name) @class (call - function: (identifier) @name) @call + function: (identifier) @name) @call "#, "", ) @@ -67,17 +67,19 @@ fn test_tags_javascript() { language, r#" ((* - (comment)+ @doc - . - (class_declaration - name: (identifier) @name) @class) - (set! strip @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + (comment)+ @doc + . + (class_declaration + name: (identifier) @name) @class) + (select-adjacent! @doc @class) + (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) ((* - (comment)+ @doc - . - (method_definition - name: (property_identifier) @name) @method) - (set! strip @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + (comment)+ @doc + . + (method_definition + name: (property_identifier) @name) @method) + (select-adjacent! @doc @method) + (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) "#, "", ) @@ -88,6 +90,8 @@ fn test_tags_javascript() { .generate_tags( &tags_config, br#" + // hi + // Data about a customer. // bla bla bla class Customer { @@ -95,7 +99,6 @@ fn test_tags_javascript() { * Get the customer's age */ getAge() { - } } "#, diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 4b5d2a7e..19b7be84 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -1,7 +1,9 @@ use regex::Regex; use serde::{Serialize, Serializer}; use std::{mem, ops, str}; -use tree_sitter::{Language, Node, Parser, Query, QueryCursor, QueryError, Tree}; +use tree_sitter::{ + Language, Node, Parser, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, +}; /// Contains the data neeeded to compute tags for code written in a /// particular language. @@ -16,7 +18,7 @@ pub struct TagsConfiguration { method_capture_index: Option, module_capture_index: Option, name_capture_index: Option, - doc_strip_regexes: Vec>, + pattern_info: Vec, } pub struct TagsContext { @@ -24,6 +26,12 @@ pub struct TagsContext { cursor: QueryCursor, } +#[derive(Default)] +struct PatternInfo { + docs_adjacent_capture: Option, + doc_strip_regex: Option, +} + struct TagsIter<'a, I> where I: Iterator>, @@ -105,20 +113,28 @@ impl TagsConfiguration { *index = Some(i as u32); } - let doc_strip_regexes = (0..query.pattern_count()) + let pattern_info = (0..query.pattern_count()) .map(|pattern_index| { - let properties = query.property_settings(pattern_index); - for property in properties { - if property.key.as_ref() == "strip" - && property.capture_id.map(|id| id as u32) == doc_capture_index - { - if let Some(value) = &property.value { - let regex = Regex::new(value.as_ref())?; - return Ok(Some(regex)); + let mut info = PatternInfo::default(); + if let Some(doc_capture_index) = doc_capture_index { + for predicate in query.general_predicates(pattern_index) { + if predicate.args.get(0) + == Some(&QueryPredicateArg::Capture(doc_capture_index)) + { + match (predicate.operator.as_ref(), predicate.args.get(1)) { + ("select-adjacent!", Some(QueryPredicateArg::Capture(index))) => { + info.docs_adjacent_capture = Some(*index); + } + ("strip!", Some(QueryPredicateArg::String(pattern))) => { + let regex = Regex::new(pattern.as_ref())?; + info.doc_strip_regex = Some(regex); + } + _ => {} + } } } } - return Ok(None); + return Ok(info); }) .collect::, Error>>()?; @@ -133,7 +149,7 @@ impl TagsConfiguration { doc_capture_index, call_capture_index, name_capture_index, - doc_strip_regexes, + pattern_info, }) } } @@ -199,26 +215,68 @@ where // If there is another match, then compute its tag and add it to the // tag queue. if let Some(mat) = self.matches.next() { - let mut docs = None; - let mut call_node = None; - let mut class_node = None; - let mut function_node = None; - let mut method_node = None; - let mut module_node = None; - let mut name_node = None; + let mut name = None; + let mut doc_nodes = Vec::new(); + let mut tag_node = None; + let mut tag_kind = TagKind::Call; + let mut docs_adjacent_node = None; for capture in mat.captures { let index = Some(capture.index); - let node = Some(capture.node); - if index == self.config.call_capture_index { - call_node = node; - } else if index == self.config.class_capture_index { - class_node = node; + + if index == self.config.pattern_info[mat.pattern_index].docs_adjacent_capture { + docs_adjacent_node = Some(capture.node); + } + + if index == self.config.name_capture_index { + name = str::from_utf8(&self.source[Some(capture.node)?.byte_range()]).ok(); } else if index == self.config.doc_capture_index { - if let Ok(content) = str::from_utf8(&self.source[capture.node.byte_range()]) - { + doc_nodes.push(capture.node); + } else if index == self.config.call_capture_index { + tag_node = Some(capture.node); + tag_kind = TagKind::Call; + } else if index == self.config.class_capture_index { + tag_node = Some(capture.node); + tag_kind = TagKind::Class; + } else if index == self.config.function_capture_index { + tag_node = Some(capture.node); + tag_kind = TagKind::Function; + } else if index == self.config.method_capture_index { + tag_node = Some(capture.node); + tag_kind = TagKind::Method; + } else if index == self.config.module_capture_index { + tag_node = Some(capture.node); + tag_kind = TagKind::Module; + } + } + + if let (Some(tag_node), Some(name)) = (tag_node, name) { + // If needed, filter the doc nodes based on their ranges, selecting + // only the slice that are adjacent to some specified node. + let mut docs_start_index = 0; + if let (Some(docs_adjacent_node), false) = + (docs_adjacent_node, doc_nodes.is_empty()) + { + docs_start_index = doc_nodes.len(); + let mut start_row = docs_adjacent_node.start_position().row; + while docs_start_index > 0 { + let doc_node = &doc_nodes[docs_start_index - 1]; + let prev_doc_end_row = doc_node.end_position().row; + if prev_doc_end_row + 1 >= start_row { + docs_start_index -= 1; + start_row = doc_node.start_position().row; + } else { + break; + } + } + } + + // Generate a doc string from all of the doc nodes, applying any strip regexes. + let mut docs = None; + for doc_node in &doc_nodes[docs_start_index..] { + if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()]) { let content = if let Some(regex) = - &self.config.doc_strip_regexes[mat.pattern_index] + &self.config.pattern_info[mat.pattern_index].doc_strip_regex { regex.replace_all(content, "").to_string() } else { @@ -232,68 +290,43 @@ where } } } - } else if index == self.config.function_capture_index { - function_node = node; - } else if index == self.config.method_capture_index { - method_node = node; - } else if index == self.config.module_capture_index { - module_node = node; - } else if index == self.config.name_capture_index { - name_node = node; } - } - let source = &self.source; - let tag_from_node = |node: Node, kind: TagKind| -> Option { - let name = str::from_utf8(&source[name_node?.byte_range()]).ok()?; + let source = &self.source; + let tag_from_node = |node: Node, kind: TagKind| -> Option { + // Slice out the first line of the text corresponding to the node in question. + let mut line_range = node.byte_range(); + line_range.end = line_range.end.min(line_range.start + 180); + let line = str::from_utf8(&source[line_range]).ok()?.lines().next()?; + Some(Tag { + name, + line, + kind, + docs, + loc: loc_for_node(node), + }) + }; - // Slice out the first line of the text corresponding to the node in question. - let mut line_range = node.byte_range(); - line_range.end = line_range.end.min(line_range.start + 180); - let line = str::from_utf8(&source[line_range]).ok()?.lines().next()?; - - Some(Tag { - name, - line, - loc: loc_for_node(node), - kind: kind, - docs, - }) - }; - - for (tag_node, tag_kind) in [ - (call_node, TagKind::Call), - (class_node, TagKind::Class), - (function_node, TagKind::Function), - (method_node, TagKind::Method), - (module_node, TagKind::Module), - ] - .iter() - .cloned() - { - if let Some(found) = tag_node { - // Only create one tag per node. The tag queue is sorted by node position - // to allow for fast lookup. - match self.tag_queue.binary_search_by_key( - &(found.end_byte(), found.start_byte(), found.id()), - |(node, _, _)| (node.end_byte(), node.start_byte(), node.id()), - ) { - Ok(i) => { - let (_, old_idx, tag) = &mut self.tag_queue[i]; - if *old_idx > mat.pattern_index { - if let Some(new_tag) = tag_from_node(found, tag_kind) { - *tag = new_tag; - *old_idx = mat.pattern_index; - } - } - } - Err(i) => { - if let Some(tag) = tag_from_node(found, tag_kind) { - self.tag_queue.insert(i, (found, mat.pattern_index, tag)) + // Only create one tag per node. The tag queue is sorted by node position + // to allow for fast lookup. + match self.tag_queue.binary_search_by_key( + &(tag_node.end_byte(), tag_node.start_byte(), tag_node.id()), + |(node, _, _)| (node.end_byte(), node.start_byte(), node.id()), + ) { + Ok(i) => { + let (_, old_idx, tag) = &mut self.tag_queue[i]; + if *old_idx > mat.pattern_index { + if let Some(new_tag) = tag_from_node(tag_node, tag_kind) { + *tag = new_tag; + *old_idx = mat.pattern_index; } } } - break; + Err(i) => { + if let Some(tag) = tag_from_node(tag_node, tag_kind) { + self.tag_queue.insert(i, (tag_node, mat.pattern_index, tag)) + } + } } } }