Merge pull request #648 from tree-sitter/tagging-improvements

Tagging improvements
This commit is contained in:
Max Brunsfeld 2020-07-10 13:48:23 -07:00 committed by GitHub
commit 0c2dc4c1e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 464 additions and 229 deletions

View file

@ -83,7 +83,7 @@ impl<'a> From<tree_sitter_highlight::Error> for Error {
impl<'a> From<tree_sitter_tags::Error> for Error {
fn from(error: tree_sitter_tags::Error) -> Self {
Error::new(format!("{:?}", error))
Error::new(format!("{}", error))
}
}

View file

@ -90,13 +90,8 @@ fn run() -> error::Result<()> {
)
.subcommand(
SubCommand::with_name("tags")
.arg(
Arg::with_name("format")
.short("f")
.long("format")
.value_name("json|protobuf")
.help("Determine output format (default: json)"),
)
.arg(Arg::with_name("quiet").long("quiet").short("q"))
.arg(Arg::with_name("time").long("quiet").short("t"))
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(
Arg::with_name("inputs")
@ -104,12 +99,6 @@ fn run() -> error::Result<()> {
.index(1)
.required(true)
.multiple(true),
)
.arg(
Arg::with_name("v")
.short("v")
.multiple(true)
.help("Sets the level of verbosity"),
),
)
.subcommand(
@ -149,8 +138,14 @@ fn run() -> error::Result<()> {
.arg(Arg::with_name("path").index(1).multiple(true)),
)
.subcommand(
SubCommand::with_name("web-ui").about("Test a parser interactively in the browser")
.arg(Arg::with_name("quiet").long("quiet").short("q").help("open in default browser")),
SubCommand::with_name("web-ui")
.about("Test a parser interactively in the browser")
.arg(
Arg::with_name("quiet")
.long("quiet")
.short("q")
.help("open in default browser"),
),
)
.subcommand(
SubCommand::with_name("dump-languages")
@ -268,7 +263,13 @@ fn run() -> error::Result<()> {
} else if let Some(matches) = matches.subcommand_matches("tags") {
loader.find_all_languages(&config.parser_directories)?;
let paths = collect_paths(matches.values_of("inputs").unwrap())?;
tags::generate_tags(&loader, matches.value_of("scope"), &paths)?;
tags::generate_tags(
&loader,
matches.value_of("scope"),
&paths,
matches.is_present("quiet"),
matches.is_present("time"),
)?;
} else if let Some(matches) = matches.subcommand_matches("highlight") {
loader.configure_highlights(&config.theme.highlight_names);
loader.find_all_languages(&config.parser_directories)?;

View file

@ -3,10 +3,17 @@ use super::util;
use crate::error::{Error, Result};
use std::io::{self, Write};
use std::path::Path;
use std::time::Instant;
use std::{fs, str};
use tree_sitter_tags::TagsContext;
pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> Result<()> {
pub fn generate_tags(
loader: &Loader,
scope: Option<&str>,
paths: &[String],
quiet: bool,
time: bool,
) -> Result<()> {
let mut lang = None;
if let Some(scope) = scope {
lang = loader.language_configuration_for_scope(scope)?;
@ -34,28 +41,50 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
};
if let Some(tags_config) = language_config.tags_config(language)? {
let path_str = format!("{:?}", path);
writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?;
let indent;
if paths.len() > 1 {
if !quiet {
writeln!(&mut stdout, "{}", path.to_string_lossy())?;
}
indent = "\t"
} else {
indent = "";
};
let source = fs::read(path)?;
let t0 = Instant::now();
for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? {
let tag = tag?;
write!(
&mut stdout,
" {:<8} {:<40}\t{:>9}-{:<9}",
tag.kind,
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
tag.span.start,
tag.span.end,
)?;
if let Some(docs) = tag.docs {
if docs.len() > 120 {
write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
} else {
write!(&mut stdout, "\t{:?}", &docs)?;
if !quiet {
write!(
&mut stdout,
"{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
indent,
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
&tags_config.syntax_type_name(tag.syntax_type_id),
if tag.is_definition { "def" } else { "ref" },
tag.span.start,
tag.span.end,
str::from_utf8(&source[tag.line_range]).unwrap_or(""),
)?;
if let Some(docs) = tag.docs {
if docs.len() > 120 {
write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
} else {
write!(&mut stdout, "\t{:?}", &docs)?;
}
}
writeln!(&mut stdout, "")?;
}
writeln!(&mut stdout, "")?;
}
if time {
writeln!(
&mut stdout,
"{}time: {}ms",
indent,
t0.elapsed().as_millis(),
)?;
}
} else {
eprintln!("No tags config found for path {:?}", path);

View file

@ -1,73 +1,79 @@
use super::helpers::allocations;
use super::helpers::fixtures::{get_language, get_language_queries_path};
use std::ffi::CStr;
use std::ffi::CString;
use std::{fs, ptr, slice, str};
use tree_sitter::Point;
use tree_sitter_tags::c_lib as c;
use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext};
use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};
const PYTHON_TAG_QUERY: &'static str = r#"
(
(function_definition
name: (identifier) @name
body: (block . (expression_statement (string) @doc))) @function
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
(function_definition
name: (identifier) @name
body: (block . (expression_statement (string) @doc))) @definition.function
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(function_definition
name: (identifier) @name) @function
name: (identifier) @name) @definition.function
(
(class_definition
name: (identifier) @name
body: (block
. (expression_statement (string) @doc))) @class
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
(class_definition
name: (identifier) @name
body: (block
. (expression_statement (string) @doc))) @definition.class
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(class_definition
name: (identifier) @name) @class
name: (identifier) @name) @definition.class
(call
function: (identifier) @name) @call
function: (identifier) @name) @reference.call
(call
function: (attribute
attribute: (identifier) @name)) @reference.call
"#;
const JS_TAG_QUERY: &'static str = r#"
(
(comment)* @doc .
(class_declaration
name: (identifier) @name) @class
(#select-adjacent! @doc @class)
name: (identifier) @name) @definition.class
(#select-adjacent! @doc @definition.class)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(method_definition
name: (property_identifier) @name) @method
(#select-adjacent! @doc @method)
name: (property_identifier) @name) @definition.method
(#select-adjacent! @doc @definition.method)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(function_declaration
name: (identifier) @name) @function
(#select-adjacent! @doc @function)
name: (identifier) @name) @definition.function
(#select-adjacent! @doc @definition.function)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(call_expression
function: (identifier) @name) @call
function: (identifier) @name) @reference.call
"#;
const RUBY_TAG_QUERY: &'static str = r#"
(method
name: (identifier) @name) @method
name: (identifier) @name) @definition.method
(method_call
method: (identifier) @name) @call
method: (identifier) @name) @reference.call
((identifier) @name @call
((identifier) @name @reference.call
(#is-not? local))
"#;
@ -99,20 +105,20 @@ fn test_tags_python() {
assert_eq!(
tags.iter()
.map(|t| (substr(source, &t.name_range), t.kind))
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", TagKind::Class),
("age", TagKind::Function),
("compute_age", TagKind::Call),
("Customer", "class"),
("age", "function"),
("compute_age", "call"),
]
);
assert_eq!(substr(source, &tags[0].line_range), " class Customer:");
assert_eq!(
substr(source, &tags[1].line_range),
" def age(self):"
);
assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
}
@ -150,12 +156,16 @@ fn test_tags_javascript() {
assert_eq!(
tags.iter()
.map(|t| (substr(source, &t.name_range), t.kind))
.map(|t| (
substr(source, &t.name_range),
t.span.clone(),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", TagKind::Class),
("getAge", TagKind::Method),
("Agent", TagKind::Class)
("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
]
);
assert_eq!(
@ -166,6 +176,26 @@ fn test_tags_javascript() {
assert_eq!(tags[2].docs, None);
}
#[test]
fn test_tags_columns_measured_in_utf16_code_units() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
let tag = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.next()
.unwrap()
.unwrap();
assert_eq!(substr(source, &tag.name_range), "hello_α");
assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
assert_eq!(tag.utf16_column_range, 9..18);
}
#[test]
fn test_tags_ruby() {
let language = get_language("ruby");
@ -204,18 +234,18 @@ fn test_tags_ruby() {
tags.iter()
.map(|t| (
substr(source.as_bytes(), &t.name_range),
t.kind,
tags_config.syntax_type_name(t.syntax_type_id),
(t.span.start.row, t.span.start.column),
))
.collect::<Vec<_>>(),
&[
("foo", TagKind::Method, (2, 0)),
("bar", TagKind::Call, (7, 4)),
("a", TagKind::Call, (7, 8)),
("b", TagKind::Call, (7, 11)),
("each", TagKind::Call, (9, 14)),
("baz", TagKind::Call, (13, 8)),
("b", TagKind::Call, (13, 15),),
("foo", "method", (2, 4)),
("bar", "call", (7, 4)),
("a", "call", (7, 8)),
("b", "call", (7, 11)),
("each", "call", (9, 14)),
("baz", "call", (13, 8)),
("b", "call", (13, 15),),
]
);
}
@ -253,6 +283,14 @@ fn test_tags_cancellation() {
});
}
#[test]
fn test_invalid_capture() {
let language = get_language("python");
let e = TagsConfiguration::new(language, "(identifier) @method", "")
.expect_err("expected InvalidCapture error");
assert_eq!(e, Error::InvalidCapture("method".to_string()));
}
#[test]
fn test_tags_via_c_api() {
allocations::record(|| {
@ -316,29 +354,29 @@ fn test_tags_via_c_api() {
})
.unwrap();
let syntax_types: Vec<&str> = unsafe {
let mut len: u32 = 0;
let ptr =
c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
slice::from_raw_parts(ptr, len as usize)
.iter()
.map(|i| CStr::from_ptr(*i).to_str().unwrap())
.collect()
};
assert_eq!(
tags.iter()
.map(|tag| (
tag.kind,
syntax_types[tag.syntax_type_id as usize],
&source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
&source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
&docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
))
.collect::<Vec<_>>(),
&[
(
c::TSTagKind::Function,
"b",
"function b() {",
"one\ntwo\nthree"
),
(
c::TSTagKind::Class,
"C",
"class C extends D {",
"four\nfive"
),
(c::TSTagKind::Call, "b", "b(a);", "")
("function", "b", "function b() {", "one\ntwo\nthree"),
("class", "C", "class C extends D {", "four\nfive"),
("call", "b", "b(a);", "")
]
);