Merge branch 'master' into query-pattern-is-definite

This commit is contained in:
Max Brunsfeld 2020-08-14 09:31:55 -07:00
commit 1ea29053e1
33 changed files with 2004 additions and 763 deletions

View file

@ -87,7 +87,7 @@ impl<'a> From<tree_sitter_highlight::Error> for Error {
impl<'a> From<tree_sitter_tags::Error> for Error {
fn from(error: tree_sitter_tags::Error) -> Self {
Error::new(format!("{:?}", error))
Error::new(format!("{}", error))
}
}

View file

@ -199,6 +199,9 @@ impl<'a> Minimizer<'a> {
right_state: &ParseState,
group_ids_by_state_id: &Vec<ParseStateId>,
) -> bool {
if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
return true;
}
for (token, left_entry) in &left_state.terminal_entries {
if let Some(right_entry) = right_state.terminal_entries.get(token) {
if self.entries_conflict(

View file

@ -19,7 +19,7 @@ pub(crate) struct FieldInfo {
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct VariableInfo {
pub fields: HashMap<String, FieldInfo>,
pub child_types: Vec<ChildType>,
pub children: FieldInfo,
pub children_without_fields: FieldInfo,
pub has_multi_step_production: bool,
}
@ -70,7 +70,7 @@ impl Default for FieldInfoJSON {
impl Default for ChildQuantity {
fn default() -> Self {
Self::zero()
Self::one()
}
}
@ -158,7 +158,7 @@ pub(crate) fn get_variable_info(
// Each variable's summary can depend on the summaries of other hidden variables,
// and variables can have mutually recursive structure. So we compute the summaries
// iteratively, in a loop that terminates only when more changes are possible.
// iteratively, in a loop that terminates only when no more changes are possible.
let mut did_change = true;
let mut all_initialized = false;
let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()];
@ -168,13 +168,14 @@ pub(crate) fn get_variable_info(
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
let mut variable_info = result[i].clone();
// Within a variable, consider each production separately. For each
// production, determine which children and fields can occur, and how many
// times they can occur.
for (production_index, production) in variable.productions.iter().enumerate() {
let mut field_quantities = HashMap::new();
let mut children_without_fields_quantity = ChildQuantity::zero();
let mut has_uninitialized_invisible_children = false;
// Examine each of the variable's productions. The variable's child types can be
// immediately combined across all productions, but the child quantities must be
// recorded separately for each production.
for production in &variable.productions {
let mut production_field_quantities = HashMap::new();
let mut production_children_quantity = ChildQuantity::zero();
let mut production_children_without_fields_quantity = ChildQuantity::zero();
let mut production_has_uninitialized_invisible_children = false;
if production.steps.len() > 1 {
variable_info.has_multi_step_production = true;
@ -190,111 +191,97 @@ pub(crate) fn get_variable_info(
ChildType::Normal(child_symbol)
};
// Record all of the types of direct children.
did_change |= sorted_vec_insert(&mut variable_info.child_types, &child_type);
let child_is_hidden = !child_type_is_visible(&child_type)
&& !syntax_grammar.supertype_symbols.contains(&child_symbol);
// Record all of the field names that occur.
// Maintain the set of all child types for this variable, and the quantity of
// visible children in this production.
did_change |=
extend_sorted(&mut variable_info.children.types, Some(&child_type));
if !child_is_hidden {
production_children_quantity.append(ChildQuantity::one());
}
// Maintain the set of child types associated with each field, and the quantity
// of children associated with each field in this production.
if let Some(field_name) = &step.field_name {
// Record how many times each field occurs in this production.
field_quantities
let field_info = variable_info
.fields
.entry(field_name.clone())
.or_insert(FieldInfo::default());
did_change |= extend_sorted(&mut field_info.types, Some(&child_type));
let production_field_quantity = production_field_quantities
.entry(field_name)
.or_insert(ChildQuantity::zero())
.append(ChildQuantity::one());
.or_insert(ChildQuantity::zero());
// Record the types of children for this field.
let field_info =
variable_info.fields.entry(field_name.clone()).or_insert({
let mut info = FieldInfo {
types: Vec::new(),
quantity: ChildQuantity::one(),
};
// If this field did *not* occur in an earlier production,
// then it is not required.
if production_index > 0 {
info.quantity.required = false;
}
info
});
did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
}
// Record named children without fields.
else if child_type_is_named(&child_type) {
// Record how many named children without fields occur in this production.
children_without_fields_quantity.append(ChildQuantity::one());
// Record the types of all of the named children without fields.
let children_info = &mut variable_info.children_without_fields;
if children_info.types.is_empty() {
children_info.quantity = ChildQuantity::one();
// Inherit the types and quantities of hidden children associated with fields.
if child_is_hidden && child_symbol.is_non_terminal() {
let child_variable_info = &result[child_symbol.index];
did_change |= extend_sorted(
&mut field_info.types,
&child_variable_info.children.types,
);
production_field_quantity.append(child_variable_info.children.quantity);
} else {
production_field_quantity.append(ChildQuantity::one());
}
did_change |= sorted_vec_insert(&mut children_info.types, &child_type);
}
// Maintain the set of named children without fields within this variable.
else if child_type_is_named(&child_type) {
production_children_without_fields_quantity.append(ChildQuantity::one());
did_change |= extend_sorted(
&mut variable_info.children_without_fields.types,
Some(&child_type),
);
}
// Inherit information from any hidden children.
if child_symbol.is_non_terminal()
&& !syntax_grammar.supertype_symbols.contains(&child_symbol)
&& step.alias.is_none()
&& !child_type_is_visible(&child_type)
{
// Inherit all child information from hidden children.
if child_is_hidden && child_symbol.is_non_terminal() {
let child_variable_info = &result[child_symbol.index];
// If a hidden child can have multiple children, then this
// node can appear to have multiple children.
// If a hidden child can have multiple children, then its parent node can
// appear to have multiple children.
if child_variable_info.has_multi_step_production {
variable_info.has_multi_step_production = true;
}
// Inherit fields from this hidden child
// If a hidden child has fields, then the parent node can appear to have
// those same fields.
for (field_name, child_field_info) in &child_variable_info.fields {
field_quantities
production_field_quantities
.entry(field_name)
.or_insert(ChildQuantity::zero())
.append(child_field_info.quantity);
let field_info = variable_info
.fields
.entry(field_name.clone())
.or_insert(FieldInfo {
types: Vec::new(),
quantity: ChildQuantity::one(),
});
for child_type in &child_field_info.types {
sorted_vec_insert(&mut field_info.types, &child_type);
}
did_change |= extend_sorted(
&mut variable_info
.fields
.entry(field_name.clone())
.or_insert(FieldInfo::default())
.types,
&child_field_info.types,
);
}
// Inherit child types from this hidden child
for child_type in &child_variable_info.child_types {
did_change |=
sorted_vec_insert(&mut variable_info.child_types, child_type);
}
// If a hidden child has children, then the parent node can appear to have
// those same children.
production_children_quantity.append(child_variable_info.children.quantity);
did_change |= extend_sorted(
&mut variable_info.children.types,
&child_variable_info.children.types,
);
// If any field points to this hidden child, inherit child types
// for the field.
if let Some(field_name) = &step.field_name {
let field_info = variable_info.fields.get_mut(field_name).unwrap();
for child_type in &child_variable_info.child_types {
did_change |= sorted_vec_insert(&mut field_info.types, &child_type);
}
}
// Inherit info about children without fields from this hidden child.
else {
// If a hidden child can have named children without fields, then the parent
// node can appear to have those same children.
if step.field_name.is_none() {
let grandchildren_info = &child_variable_info.children_without_fields;
if !grandchildren_info.types.is_empty() {
children_without_fields_quantity
.append(grandchildren_info.quantity);
if variable_info.children_without_fields.types.is_empty() {
variable_info.children_without_fields.quantity =
ChildQuantity::one();
}
for child_type in &grandchildren_info.types {
did_change |= sorted_vec_insert(
&mut variable_info.children_without_fields.types,
&child_type,
);
}
production_children_without_fields_quantity
.append(child_variable_info.children_without_fields.quantity);
did_change |= extend_sorted(
&mut variable_info.children_without_fields.types,
&child_variable_info.children_without_fields.types,
);
}
}
}
@ -302,22 +289,27 @@ pub(crate) fn get_variable_info(
// Note whether or not this production contains children whose summaries
// have not yet been computed.
if child_symbol.index >= i && !all_initialized {
has_uninitialized_invisible_children = true;
production_has_uninitialized_invisible_children = true;
}
}
// If this production's children all have had their summaries initialized,
// then expand the quantity information with all of the possibilities introduced
// by this production.
if !has_uninitialized_invisible_children {
if !production_has_uninitialized_invisible_children {
did_change |= variable_info
.children
.quantity
.union(production_children_quantity);
did_change |= variable_info
.children_without_fields
.quantity
.union(children_without_fields_quantity);
.union(production_children_without_fields_quantity);
for (field_name, info) in variable_info.fields.iter_mut() {
did_change |= info.quantity.union(
field_quantities
production_field_quantities
.get(field_name)
.cloned()
.unwrap_or(ChildQuantity::zero()),
@ -352,13 +344,15 @@ pub(crate) fn get_variable_info(
// Update all of the node type lists to eliminate hidden nodes.
for supertype_symbol in &syntax_grammar.supertype_symbols {
result[supertype_symbol.index]
.child_types
.children
.types
.retain(child_type_is_visible);
}
for variable_info in result.iter_mut() {
for (_, field_info) in variable_info.fields.iter_mut() {
field_info.types.retain(child_type_is_visible);
}
variable_info.fields.retain(|_, v| !v.types.is_empty());
variable_info
.children_without_fields
.types
@ -467,7 +461,8 @@ pub(crate) fn generate_node_types_json(
subtypes: None,
});
let mut subtypes = info
.child_types
.children
.types
.iter()
.map(child_type_to_node_type)
.collect::<Vec<_>>();
@ -686,16 +681,19 @@ fn variable_type_for_child_type(
}
}
fn sorted_vec_insert<T>(vec: &mut Vec<T>, value: &T) -> bool
fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T>) -> bool
where
T: Clone + Eq + Ord,
T: 'a,
{
if let Err(i) = vec.binary_search(&value) {
vec.insert(i, value.clone());
true
} else {
false
}
values.into_iter().any(|value| {
if let Err(i) = vec.binary_search(&value) {
vec.insert(i, value.clone());
true
} else {
false
}
})
}
#[cfg(test)]
@ -1177,6 +1175,38 @@ mod tests {
);
}
#[test]
fn test_node_types_with_fields_on_hidden_tokens() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
supertype_symbols: vec![],
variables: vec![Variable {
name: "script".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::field("a".to_string(), Rule::pattern("hi")),
Rule::field("b".to_string(), Rule::pattern("bye")),
]),
}],
});
assert_eq!(
node_types,
[NodeInfoJSON {
kind: "script".to_string(),
named: true,
fields: Some(BTreeMap::new()),
children: None,
subtypes: None
}]
);
}
#[test]
fn test_node_types_with_multiple_rules_same_alias_name() {
let node_types = get_node_types(InputGrammar {
@ -1461,6 +1491,71 @@ mod tests {
);
}
#[test]
fn test_get_variable_info_with_repetitions_inside_fields() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
// Field associated with a repetition.
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1")],
},
Production {
dynamic_precedence: 0,
steps: vec![],
},
],
},
// Repetition node
SyntaxVariable {
name: "_rule0_repeat".to_string(),
kind: VariableType::Hidden,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(1))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::non_terminal(1)),
ProductionStep::new(Symbol::non_terminal(1)),
],
},
],
},
],
vec![],
),
&build_lexical_grammar(),
&AliasMap::new(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
quantity: ChildQuantity {
exists: true,
required: false,
multiple: true,
},
types: vec![ChildType::Normal(Symbol::terminal(1))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
#[test]
fn test_get_variable_info_with_inherited_fields() {
let variable_info = get_variable_info(

View file

@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder {
last_inserted_step.associativity = removed_step.associativity;
}
}
if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
production.dynamic_precedence = p.dynamic_precedence;
}
production
}),
);
@ -226,7 +229,7 @@ mod tests {
],
},
Production {
dynamic_precedence: 0,
dynamic_precedence: -2,
steps: vec![ProductionStep::new(Symbol::terminal(14))],
},
],
@ -258,7 +261,7 @@ mod tests {
],
},
Production {
dynamic_precedence: 0,
dynamic_precedence: -2,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::terminal(14)),

View file

@ -160,7 +160,9 @@ impl Loader {
// If multiple language configurations match, then determine which
// one to use by applying the configurations' content regexes.
else {
let file_contents = fs::read_to_string(path)?;
let file_contents = fs::read(path)
.map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?;
let file_contents = String::from_utf8_lossy(&file_contents);
let mut best_score = -2isize;
let mut best_configuration_id = None;
for configuration_id in configuration_ids {

View file

@ -53,11 +53,12 @@ fn run() -> error::Result<()> {
.subcommand(
SubCommand::with_name("parse")
.about("Parse files")
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("path")
Arg::with_name("paths")
.index(1)
.multiple(true)
.required(true),
.required(false),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("debug").long("debug").short("d"))
@ -79,37 +80,33 @@ fn run() -> error::Result<()> {
SubCommand::with_name("query")
.about("Search files using a syntax tree query")
.arg(Arg::with_name("query-path").index(1).required(true))
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("path")
Arg::with_name("paths")
.index(2)
.multiple(true)
.required(true),
.required(false),
)
.arg(
Arg::with_name("byte-range")
.help("The range of byte offsets in which the query will be executed")
.long("byte-range")
.takes_value(true),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("captures").long("captures").short("c")),
)
.subcommand(
SubCommand::with_name("tags")
.arg(
Arg::with_name("format")
.short("f")
.long("format")
.value_name("json|protobuf")
.help("Determine output format (default: json)"),
)
.arg(Arg::with_name("quiet").long("quiet").short("q"))
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("inputs")
Arg::with_name("paths")
.help("The source file to use")
.index(1)
.required(true)
.multiple(true),
)
.arg(
Arg::with_name("v")
.short("v")
.multiple(true)
.help("Sets the level of verbosity"),
),
)
.subcommand(
@ -127,11 +124,12 @@ fn run() -> error::Result<()> {
.subcommand(
SubCommand::with_name("highlight")
.about("Highlight a file")
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("path")
Arg::with_name("paths")
.index(1)
.multiple(true)
.required(true),
.required(false),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("html").long("html").short("h"))
@ -230,7 +228,9 @@ fn run() -> error::Result<()> {
let timeout = matches
.value_of("timeout")
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
let paths = collect_paths(matches.values_of("path").unwrap())?;
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
let mut has_error = false;
loader.find_all_languages(&config.parser_directories)?;
@ -256,31 +256,36 @@ fn run() -> error::Result<()> {
}
} else if let Some(matches) = matches.subcommand_matches("query") {
let ordered_captures = matches.values_of("captures").is_some();
let paths = matches
.values_of("path")
.unwrap()
.into_iter()
.map(Path::new)
.collect::<Vec<&Path>>();
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
loader.find_all_languages(&config.parser_directories)?;
let language = select_language(
&mut loader,
paths[0],
Path::new(&paths[0]),
&current_dir,
matches.value_of("scope"),
)?;
let query_path = Path::new(matches.value_of("query-path").unwrap());
query::query_files_at_paths(language, paths, query_path, ordered_captures)?;
let range = matches.value_of("byte-range").map(|br| {
let r: Vec<&str> = br.split(":").collect();
(r[0].parse().unwrap(), r[1].parse().unwrap())
});
query::query_files_at_paths(language, paths, query_path, ordered_captures, range)?;
} else if let Some(matches) = matches.subcommand_matches("tags") {
loader.find_all_languages(&config.parser_directories)?;
let paths = collect_paths(matches.values_of("inputs").unwrap())?;
tags::generate_tags(&loader, matches.value_of("scope"), &paths)?;
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
tags::generate_tags(
&loader,
matches.value_of("scope"),
&paths,
matches.is_present("quiet"),
matches.is_present("time"),
)?;
} else if let Some(matches) = matches.subcommand_matches("highlight") {
loader.configure_highlights(&config.theme.highlight_names);
loader.find_all_languages(&config.parser_directories)?;
let time = matches.is_present("time");
let paths = collect_paths(matches.values_of("path").unwrap())?;
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let html_mode = matches.is_present("html");
if html_mode {
println!("{}", highlight::HTML_HEADER);
@ -353,39 +358,58 @@ fn run() -> error::Result<()> {
Ok(())
}
fn collect_paths<'a>(paths: impl Iterator<Item = &'a str>) -> error::Result<Vec<String>> {
let mut result = Vec::new();
fn collect_paths<'a>(
paths_file: Option<&str>,
paths: Option<impl Iterator<Item = &'a str>>,
) -> error::Result<Vec<String>> {
if let Some(paths_file) = paths_file {
return Ok(fs::read_to_string(paths_file)
.map_err(Error::wrap(|| {
format!("Failed to read paths file {}", paths_file)
}))?
.trim()
.split_ascii_whitespace()
.map(String::from)
.collect::<Vec<_>>());
}
let mut incorporate_path = |path: &str, positive| {
if positive {
result.push(path.to_string());
} else {
if let Some(index) = result.iter().position(|p| p == path) {
result.remove(index);
if let Some(paths) = paths {
let mut result = Vec::new();
let mut incorporate_path = |path: &str, positive| {
if positive {
result.push(path.to_string());
} else {
if let Some(index) = result.iter().position(|p| p == path) {
result.remove(index);
}
}
}
};
};
for mut path in paths {
let mut positive = true;
if path.starts_with("!") {
positive = false;
path = path.trim_start_matches("!");
}
for mut path in paths {
let mut positive = true;
if path.starts_with("!") {
positive = false;
path = path.trim_start_matches("!");
}
if Path::new(path).exists() {
incorporate_path(path, positive);
} else {
let paths =
glob(path).map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
for path in paths {
if let Some(path) = path?.to_str() {
incorporate_path(path, positive);
if Path::new(path).exists() {
incorporate_path(path, positive);
} else {
let paths = glob(path)
.map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
for path in paths {
if let Some(path) = path?.to_str() {
incorporate_path(path, positive);
}
}
}
}
return Ok(result);
}
Ok(result)
Err(Error::new("Must provide one or more paths".to_string()))
}
fn select_language(

View file

@ -6,9 +6,10 @@ use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
pub fn query_files_at_paths(
language: Language,
paths: Vec<&Path>,
paths: Vec<String>,
query_path: &Path,
ordered_captures: bool,
range: Option<(usize, usize)>,
) -> Result<()> {
let stdout = io::stdout();
let mut stdout = stdout.lock();
@ -20,14 +21,17 @@ pub fn query_files_at_paths(
.map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?;
let mut query_cursor = QueryCursor::new();
if let Some((beg, end)) = range {
query_cursor.set_byte_range(beg, end);
}
let mut parser = Parser::new();
parser.set_language(language).map_err(|e| e.to_string())?;
for path in paths {
writeln!(&mut stdout, "{}", path.to_str().unwrap())?;
writeln!(&mut stdout, "{}", path)?;
let source_code = fs::read(path).map_err(Error::wrap(|| {
let source_code = fs::read(&path).map_err(Error::wrap(|| {
format!("Error reading source file {:?}", path)
}))?;
let text_callback = |n: Node| &source_code[n.byte_range()];

View file

@ -3,10 +3,17 @@ use super::util;
use crate::error::{Error, Result};
use std::io::{self, Write};
use std::path::Path;
use std::time::Instant;
use std::{fs, str};
use tree_sitter_tags::TagsContext;
pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> Result<()> {
pub fn generate_tags(
loader: &Loader,
scope: Option<&str>,
paths: &[String],
quiet: bool,
time: bool,
) -> Result<()> {
let mut lang = None;
if let Some(scope) = scope {
lang = loader.language_configuration_for_scope(scope)?;
@ -34,28 +41,50 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) ->
};
if let Some(tags_config) = language_config.tags_config(language)? {
let path_str = format!("{:?}", path);
writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?;
let indent;
if paths.len() > 1 {
if !quiet {
writeln!(&mut stdout, "{}", path.to_string_lossy())?;
}
indent = "\t"
} else {
indent = "";
};
let source = fs::read(path)?;
for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? {
let t0 = Instant::now();
for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))?.0 {
let tag = tag?;
write!(
&mut stdout,
" {:<8} {:<40}\t{:>9}-{:<9}",
tag.kind,
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
tag.span.start,
tag.span.end,
)?;
if let Some(docs) = tag.docs {
if docs.len() > 120 {
write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
} else {
write!(&mut stdout, "\t{:?}", &docs)?;
if !quiet {
write!(
&mut stdout,
"{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
indent,
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
&tags_config.syntax_type_name(tag.syntax_type_id),
if tag.is_definition { "def" } else { "ref" },
tag.span.start,
tag.span.end,
str::from_utf8(&source[tag.line_range]).unwrap_or(""),
)?;
if let Some(docs) = tag.docs {
if docs.len() > 120 {
write!(&mut stdout, "\t{:?}...", &docs[0..120])?;
} else {
write!(&mut stdout, "\t{:?}", &docs)?;
}
}
writeln!(&mut stdout, "")?;
}
writeln!(&mut stdout, "")?;
}
if time {
writeln!(
&mut stdout,
"{}time: {}ms",
indent,
t0.elapsed().as_millis(),
)?;
}
} else {
eprintln!("No tags config found for path {:?}", path);

View file

@ -3,6 +3,7 @@ mod helpers;
mod highlight_test;
mod node_test;
mod parser_test;
mod pathological_test;
mod query_test;
mod tags_test;
mod test_highlight_test;

View file

@ -0,0 +1,15 @@
use super::helpers::allocations;
use super::helpers::fixtures::get_language;
use tree_sitter::Parser;
#[test]
fn test_pathological_example_1() {
let language = "cpp";
let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
allocations::record(|| {
let mut parser = Parser::new();
parser.set_language(get_language(language)).unwrap();
parser.parse(source, None).unwrap();
});
}

View file

@ -408,7 +408,7 @@ fn test_query_matches_with_many_overlapping_results() {
)
.unwrap();
let count = 80;
let count = 1024;
// Deeply nested chained function calls:
// a
@ -573,8 +573,8 @@ fn test_query_matches_with_immediate_siblings() {
&[
(0, vec![("parent", "a"), ("child", "b")]),
(0, vec![("parent", "b"), ("child", "c")]),
(1, vec![("last-child", "d")]),
(0, vec![("parent", "c"), ("child", "d")]),
(1, vec![("last-child", "d")]),
(2, vec![("first-element", "w")]),
(2, vec![("first-element", "1")]),
],
@ -758,6 +758,55 @@ fn test_query_matches_with_nested_repetitions() {
});
}
#[test]
fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() {
allocations::record(|| {
let language = get_language("javascript");
// When this query sees a comment, it must keep track of several potential
// matches: up to two for each pattern that begins with a comment.
let query = Query::new(
language,
r#"
(call_expression
function: (member_expression
property: (property_identifier) @name)) @ref.method
((comment)* @doc (function_declaration))
((comment)* @doc (generator_function_declaration))
((comment)* @doc (class_declaration))
((comment)* @doc (lexical_declaration))
((comment)* @doc (variable_declaration))
((comment)* @doc (method_definition))
(comment) @comment
"#,
)
.unwrap();
// Here, a series of comments occurs in the middle of a match of the first
// pattern. To avoid exceeding the storage limits and discarding that outer
// match, the comment-related matches need to be managed efficiently.
let source = format!(
"theObject\n{}\n.theMethod()",
" // the comment\n".repeat(64)
);
assert_query_matches(
language,
&query,
&source,
&vec![(7, vec![("comment", "// the comment")]); 64]
.into_iter()
.chain(vec![(
0,
vec![("ref.method", source.as_str()), ("name", "theMethod")],
)])
.collect::<Vec<_>>(),
);
});
}
#[test]
fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() {
allocations::record(|| {
@ -1161,6 +1210,43 @@ fn test_query_matches_with_too_many_permutations_to_track() {
});
}
#[test]
fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
"
(
(comment) @doc
; not immediate
(class_declaration) @class
)
(call_expression
function: [
(identifier) @function
(member_expression property: (property_identifier) @method)
])
",
)
.unwrap();
let source = "/* hi */ a.b(); ".repeat(50);
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(&source));
assert_eq!(
collect_matches(matches, &query, source.as_str()),
vec![(1, vec![("method", "b")]); 50],
);
});
}
#[test]
fn test_query_matches_with_anonymous_tokens() {
allocations::record(|| {
@ -1215,6 +1301,45 @@ fn test_query_matches_within_byte_range() {
});
}
#[test]
fn test_query_captures_within_byte_range() {
allocations::record(|| {
let language = get_language("c");
let query = Query::new(
language,
"
(call_expression
function: (identifier) @function
arguments: (argument_list (string_literal) @string.arg))
(string_literal) @string
",
)
.unwrap();
let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#;
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = QueryCursor::new();
let captures =
cursor
.set_byte_range(3, 27)
.captures(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_captures(captures, &query, source),
&[
("function", "DEFUN"),
("string.arg", "\"safe-length\""),
("string", "\"safe-length\""),
]
);
});
}
#[test]
fn test_query_matches_different_queries_same_cursor() {
allocations::record(|| {
@ -1420,12 +1545,17 @@ fn test_query_captures_with_text_conditions() {
((identifier) @function.builtin
(#eq? @function.builtin "require"))
(identifier) @variable
((identifier) @variable
(#not-match? @variable "^(lambda|load)$"))
"#,
)
.unwrap();
let source = "
toad
load
panda
lambda
const ab = require('./ab');
new Cd(EF);
";
@ -1439,6 +1569,8 @@ fn test_query_captures_with_text_conditions() {
assert_eq!(
collect_captures(captures, &query, source),
&[
("variable", "toad"),
("variable", "panda"),
("variable", "ab"),
("function.builtin", "require"),
("variable", "require"),
@ -2074,6 +2206,39 @@ fn test_query_disable_pattern() {
});
}
#[test]
fn test_query_alternative_predicate_prefix() {
allocations::record(|| {
let language = get_language("c");
let query = Query::new(
language,
r#"
((call_expression
function: (identifier) @keyword
arguments: (argument_list
(string_literal) @function))
(.eq? @keyword "DEFUN"))
"#,
)
.unwrap();
let source = r#"
DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0,
doc: /* Return the argument unchanged. */
attributes: const)
(Lisp_Object arg)
{
return arg;
}
"#;
assert_query_matches(
language,
&query,
source,
&[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])],
);
});
}
#[test]
fn test_query_is_definite() {
struct Row {
@ -2086,10 +2251,7 @@ fn test_query_is_definite() {
Row {
language: get_language("python"),
pattern: r#"(expression_statement (string))"#,
results_by_symbol: &[
("expression_statement", false),
("string", false),
],
results_by_symbol: &[("expression_statement", false), ("string", false)],
},
Row {
language: get_language("javascript"),
@ -2102,30 +2264,17 @@ fn test_query_is_definite() {
Row {
language: get_language("javascript"),
pattern: r#"(object "{" "}")"#,
results_by_symbol: &[
("object", false),
("{", true),
("}", true),
],
results_by_symbol: &[("object", false), ("{", true), ("}", true)],
},
Row {
language: get_language("javascript"),
pattern: r#"(pair (property_identifier) ":")"#,
results_by_symbol: &[
("pair", false),
("property_identifier", false),
(":", true),
],
results_by_symbol: &[("pair", false), ("property_identifier", false), (":", true)],
},
Row {
language: get_language("javascript"),
pattern: r#"(object "{" (_) "}")"#,
results_by_symbol: &[
("object", false),
("{", false),
("", false),
("}", true),
],
results_by_symbol: &[("object", false), ("{", false), ("", false), ("}", true)],
},
Row {
language: get_language("javascript"),

View file

@ -1,73 +1,81 @@
use super::helpers::allocations;
use super::helpers::fixtures::{get_language, get_language_queries_path};
use std::ffi::CStr;
use std::ffi::CString;
use std::{fs, ptr, slice, str};
use tree_sitter::Point;
use tree_sitter_tags::c_lib as c;
use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext};
use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};
const PYTHON_TAG_QUERY: &'static str = r#"
(
(function_definition
name: (identifier) @name
body: (block . (expression_statement (string) @doc))) @function
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
(function_definition
name: (identifier) @name
body: (block . (expression_statement (string) @doc))) @definition.function
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(function_definition
name: (identifier) @name) @function
name: (identifier) @name) @definition.function
(
(class_definition
name: (identifier) @name
body: (block
. (expression_statement (string) @doc))) @class
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
(class_definition
name: (identifier) @name
body: (block
. (expression_statement (string) @doc))) @definition.class
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(class_definition
name: (identifier) @name) @class
name: (identifier) @name) @definition.class
(call
function: (identifier) @name) @call
function: (identifier) @name) @reference.call
(call
function: (attribute
attribute: (identifier) @name)) @reference.call
"#;
const JS_TAG_QUERY: &'static str = r#"
(
(comment)* @doc .
(class_declaration
name: (identifier) @name) @class
(#select-adjacent! @doc @class)
name: (identifier) @name) @definition.class
(#select-adjacent! @doc @definition.class)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(method_definition
name: (property_identifier) @name) @method
(#select-adjacent! @doc @method)
name: (property_identifier) @name) @definition.method
(#select-adjacent! @doc @definition.method)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(function_declaration
name: (identifier) @name) @function
(#select-adjacent! @doc @function)
name: (identifier) @name) @definition.function
(#select-adjacent! @doc @definition.function)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(call_expression
function: (identifier) @name) @call
function: (identifier) @name) @reference.call
"#;
const RUBY_TAG_QUERY: &'static str = r#"
(method
name: (identifier) @name) @method
name: (_) @name) @definition.method
(method_call
method: (identifier) @name) @call
method: (identifier) @name) @reference.call
((identifier) @name @call
(setter (identifier) @ignore)
((identifier) @name @reference.call
(#is-not? local))
"#;
@ -94,25 +102,26 @@ fn test_tags_python() {
let tags = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (substr(source, &t.name_range), t.kind))
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", TagKind::Class),
("age", TagKind::Function),
("compute_age", TagKind::Call),
("Customer", "class"),
("age", "function"),
("compute_age", "call"),
]
);
assert_eq!(substr(source, &tags[0].line_range), " class Customer:");
assert_eq!(
substr(source, &tags[1].line_range),
" def age(self):"
);
assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
}
@ -145,17 +154,22 @@ fn test_tags_javascript() {
let tags = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (substr(source, &t.name_range), t.kind))
.map(|t| (
substr(source, &t.name_range),
t.span.clone(),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", TagKind::Class),
("getAge", TagKind::Method),
("Agent", TagKind::Class)
("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
]
);
assert_eq!(
@ -166,6 +180,27 @@ fn test_tags_javascript() {
assert_eq!(tags[2].docs, None);
}
#[test]
fn test_tags_columns_measured_in_utf16_code_units() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
let tag = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.next()
.unwrap()
.unwrap();
assert_eq!(substr(source, &tag.name_range), "hello_α");
assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
assert_eq!(tag.utf16_column_range, 9..18);
}
#[test]
fn test_tags_ruby() {
let language = get_language("ruby");
@ -177,7 +212,7 @@ fn test_tags_ruby() {
"
b = 1
def foo()
def foo=()
c = 1
# a is a method because it is not in scope
@ -197,6 +232,7 @@ fn test_tags_ruby() {
let tags = tag_context
.generate_tags(&tags_config, source.as_bytes(), None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
@ -204,18 +240,18 @@ fn test_tags_ruby() {
tags.iter()
.map(|t| (
substr(source.as_bytes(), &t.name_range),
t.kind,
tags_config.syntax_type_name(t.syntax_type_id),
(t.span.start.row, t.span.start.column),
))
.collect::<Vec<_>>(),
&[
("foo", TagKind::Method, (2, 0)),
("bar", TagKind::Call, (7, 4)),
("a", TagKind::Call, (7, 8)),
("b", TagKind::Call, (7, 11)),
("each", TagKind::Call, (9, 14)),
("baz", TagKind::Call, (13, 8)),
("b", TagKind::Call, (13, 15),),
("foo=", "method", (2, 4)),
("bar", "call", (7, 4)),
("a", "call", (7, 8)),
("b", "call", (7, 11)),
("each", "call", (9, 14)),
("baz", "call", (13, 8)),
("b", "call", (13, 15),),
]
);
}
@ -239,7 +275,7 @@ fn test_tags_cancellation() {
.generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
.unwrap();
for (i, tag) in tags.enumerate() {
for (i, tag) in tags.0.enumerate() {
if i == 150 {
cancellation_flag.store(1, Ordering::SeqCst);
}
@ -253,6 +289,47 @@ fn test_tags_cancellation() {
});
}
#[test]
fn test_invalid_capture() {
let language = get_language("python");
let e = TagsConfiguration::new(language, "(identifier) @method", "")
.expect_err("expected InvalidCapture error");
assert_eq!(e, Error::InvalidCapture("method".to_string()));
}
#[test]
fn test_tags_with_parse_error() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = br#"
class Fine: pass
class Bad
"#;
let (tags, failed) = tag_context
.generate_tags(&tags_config, source, None)
.unwrap();
let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
assert!(failed, "syntax error should have been detected");
assert_eq!(
newtags.iter()
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Fine", "class"),
]
);
}
#[test]
fn test_tags_via_c_api() {
allocations::record(|| {
@ -316,29 +393,29 @@ fn test_tags_via_c_api() {
})
.unwrap();
let syntax_types: Vec<&str> = unsafe {
let mut len: u32 = 0;
let ptr =
c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
slice::from_raw_parts(ptr, len as usize)
.iter()
.map(|i| CStr::from_ptr(*i).to_str().unwrap())
.collect()
};
assert_eq!(
tags.iter()
.map(|tag| (
tag.kind,
syntax_types[tag.syntax_type_id as usize],
&source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
&source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
&docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
))
.collect::<Vec<_>>(),
&[
(
c::TSTagKind::Function,
"b",
"function b() {",
"one\ntwo\nthree"
),
(
c::TSTagKind::Class,
"C",
"class C extends D {",
"four\nfive"
),
(c::TSTagKind::Call, "b", "b(a);", "")
("function", "b", "function b() {", "one\ntwo\nthree"),
("class", "C", "class C extends D {", "four\nfive"),
("call", "b", "b(a);", "")
]
);

View file

@ -1,3 +1,4 @@
use super::error::{Error, Result};
use std::io;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
@ -31,12 +32,12 @@ pub struct LogSession();
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
#[cfg(windows)]
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
Ok(LogSession())
}
#[cfg(unix)]
pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
use std::io::Write;
let mut dot_file = std::fs::File::create(path)?;
@ -46,11 +47,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.expect("Failed to run Dot");
.map_err(Error::wrap(|| {
"Failed to run the `dot` command. Check that graphviz is installed."
}))?;
let dot_stdin = dot_process
.stdin
.take()
.expect("Failed to open stdin for Dot");
.ok_or_else(|| Error::new("Failed to open stdin for `dot` process.".to_string()))?;
parser.print_dot_graphs(&dot_stdin);
Ok(LogSession(
PathBuf::from(path),