Merge pull request #738 from tree-sitter/supertype-queries

Allow tree queries to match on nodes' supertypes
This commit is contained in:
Max Brunsfeld 2020-09-23 12:19:04 -07:00 committed by GitHub
commit 4d99e23946
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 219 additions and 68 deletions

View file

@ -68,6 +68,7 @@ impl<'a> Minimizer<'a> {
..
} => {
if !self.simple_aliases.contains_key(&symbol)
&& !self.syntax_grammar.supertype_symbols.contains(&symbol)
&& !aliased_symbols.contains(&symbol)
&& self.syntax_grammar.variables[symbol.index].kind
!= VariableType::Named

View file

@ -325,15 +325,8 @@ pub(crate) fn get_variable_info(
}
for supertype_symbol in &syntax_grammar.supertype_symbols {
let variable = &syntax_grammar.variables[supertype_symbol.index];
if variable.kind != VariableType::Hidden {
return Err(Error::grammar(&format!(
"Supertype symbols must be hidden, but `{}` is not",
variable.name
)));
}
if result[supertype_symbol.index].has_multi_step_production {
let variable = &syntax_grammar.variables[supertype_symbol.index];
return Err(Error::grammar(&format!(
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
variable.name

View file

@ -73,6 +73,12 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
);
}
for (i, variable) in variables.iter_mut().enumerate() {
if supertype_symbols.contains(&Symbol::non_terminal(i)) {
variable.kind = VariableType::Hidden;
}
}
Ok(InternedGrammar {
variables,
external_tokens,

View file

@ -460,6 +460,9 @@ impl Generator {
VariableType::Hidden => {
add_line!(self, ".visible = false,");
add_line!(self, ".named = true,");
if self.syntax_grammar.supertype_symbols.contains(symbol) {
add_line!(self, ".supertype = true,");
}
}
VariableType::Auxiliary => {
add_line!(self, ".visible = false,");

View file

@ -291,6 +291,24 @@ fn test_query_errors_on_impossible_patterns() {
.join("\n")
))
);
Query::new(
js_lang,
"(if_statement
condition: (parenthesized_expression (_expression) @cond))",
)
.unwrap();
assert_eq!(
Query::new(js_lang, "(if_statement condition: (_expression))",),
Err(QueryError::Structure(
1,
[
"(if_statement condition: (_expression))", //
" ^",
]
.join("\n")
))
);
});
}
@ -701,7 +719,6 @@ fn test_query_matches_with_immediate_siblings() {
(2, vec![("last-stmt", "g()")]),
],
);
});
}
@ -1395,6 +1412,48 @@ fn test_query_matches_with_anonymous_tokens() {
});
}
#[test]
fn test_query_matches_with_supertypes() {
allocations::record(|| {
let language = get_language("python");
let query = Query::new(
language,
r#"
(argument_list (_expression) @arg)
(keyword_argument
value: (_expression) @kw_arg)
(assignment
left: (left_hand_side (identifier) @var_def))
(_primary_expression/identifier) @var_ref
"#,
)
.unwrap();
assert_query_matches(
language,
&query,
"
a = b.c(
[d],
# a comment
e=f
)
",
&[
(2, vec![("var_def", "a")]),
(3, vec![("var_ref", "b")]),
(0, vec![("arg", "[d]")]),
(3, vec![("var_ref", "d")]),
(1, vec![("kw_arg", "f")]),
(3, vec![("var_ref", "f")]),
],
);
});
}
#[test]
fn test_query_matches_within_byte_range() {
allocations::record(|| {

View file

@ -35,6 +35,7 @@ typedef uint16_t TSStateId;
typedef struct {
bool visible : 1;
bool named : 1;
bool supertype: 1;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;

View file

@ -89,7 +89,7 @@ TSSymbol ts_language_symbol_for_name(
uint32_t count = ts_language_symbol_count(self);
for (TSSymbol i = 0; i < count; i++) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
if (!metadata.visible || metadata.named != is_named) continue;
if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
const char *symbol_name = self->symbol_names[i];
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {

View file

@ -47,6 +47,7 @@ typedef struct {
*/
typedef struct {
TSSymbol symbol;
TSSymbol supertype_symbol;
TSFieldId field;
uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
uint16_t alternative_index;
@ -638,6 +639,13 @@ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
return &self->stack[self->depth - 1];
}
static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
for (unsigned i = 0; i < self->depth; i++) {
if (self->stack[i].parent_symbol == symbol) return true;
}
return false;
}
/***********************
* AnalysisSubgraphNode
***********************/
@ -1133,6 +1141,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
if (step->field && step->field != field_id) {
does_match = false;
}
if (
step->supertype_symbol &&
!analysis_state__has_supertype(state, step->supertype_symbol)
) does_match = false;
}
// If this is a hidden child, then push a new entry to the stack, in order to
@ -1626,14 +1638,9 @@ static TSQueryError ts_query__parse_pattern(
else {
TSSymbol symbol;
// Parse the wildcard symbol
if (
stream->next == '_' ||
// TODO - remove.
// For temporary backward compatibility, handle '*' as a wildcard.
stream->next == '*'
) {
// TODO - remove.
// For temporary backward compatibility, handle '*' as a wildcard.
if (stream->next == '*') {
symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
stream_advance(stream);
}
@ -1651,15 +1658,22 @@ static TSQueryError ts_query__parse_pattern(
return ts_query__parse_predicate(self, stream);
}
symbol = ts_language_symbol_for_name(
self->language,
node_name,
length,
true
);
if (!symbol) {
stream_reset(stream, node_name);
return TSQueryErrorNodeType;
// Parse the wildcard symbol
else if (length == 1 && node_name[0] == '_') {
symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
}
else {
symbol = ts_language_symbol_for_name(
self->language,
node_name,
length,
true
);
if (!symbol) {
stream_reset(stream, node_name);
return TSQueryErrorNodeType;
}
}
} else {
return TSQueryErrorSyntax;
@ -1667,9 +1681,38 @@ static TSQueryError ts_query__parse_pattern(
// Add a step for the node.
array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
if (ts_language_symbol_metadata(self->language, symbol).supertype) {
QueryStep *step = array_back(&self->steps);
step->supertype_symbol = step->symbol;
step->symbol = NAMED_WILDCARD_SYMBOL;
}
stream_skip_whitespace(stream);
if (stream->next == '/') {
stream_advance(stream);
if (!stream_is_ident_start(stream)) {
return TSQueryErrorSyntax;
}
const char *node_name = stream->input;
stream_scan_identifier(stream);
uint32_t length = stream->input - node_name;
QueryStep *step = array_back(&self->steps);
step->symbol = ts_language_symbol_for_name(
self->language,
node_name,
length,
true
);
if (!step->symbol) {
stream_reset(stream, node_name);
return TSQueryErrorNodeType;
}
}
// Parse the child patterns
stream_skip_whitespace(stream);
bool child_is_immediate = false;
uint16_t child_start_step_index = self->steps.size;
for (;;) {
@ -2552,11 +2595,17 @@ static inline bool ts_query_cursor__advance(
bool has_later_siblings;
bool has_later_named_siblings;
bool can_have_later_siblings_with_this_field;
TSFieldId field_id = ts_tree_cursor_current_status(
TSFieldId field_id = 0;
TSSymbol supertypes[8] = {0};
unsigned supertype_count = 8;
ts_tree_cursor_current_status(
&self->cursor,
&field_id,
&has_later_siblings,
&has_later_named_siblings,
&can_have_later_siblings_with_this_field
&can_have_later_siblings_with_this_field,
supertypes,
&supertype_count
);
LOG(
"enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
@ -2575,6 +2624,7 @@ static inline bool ts_query_cursor__advance(
// If this node matches the first step of the pattern, then add a new
// state at the start of this pattern.
if (step->field && field_id != step->field) continue;
if (step->supertype_symbol && !supertype_count) continue;
ts_query_cursor__add_state(self, pattern);
}
@ -2622,6 +2672,16 @@ static inline bool ts_query_cursor__advance(
if (step->is_last_child && has_later_named_siblings) {
node_does_match = false;
}
if (step->supertype_symbol) {
bool has_supertype = false;
for (unsigned j = 0; j < supertype_count; j++) {
if (supertypes[j] == step->supertype_symbol) {
has_supertype = true;
break;
}
}
if (!has_supertype) node_does_match = false;
}
if (step->field) {
if (step->field == field_id) {
if (!can_have_later_siblings_with_this_field) {

View file

@ -246,14 +246,19 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
// Private - Get various facts about the current node that are needed
// when executing tree queries.
TSFieldId ts_tree_cursor_current_status(
void ts_tree_cursor_current_status(
const TSTreeCursor *_self,
TSFieldId *field_id,
bool *has_later_siblings,
bool *has_later_named_siblings,
bool *can_have_later_siblings_with_this_field
bool *can_have_later_siblings_with_this_field,
TSSymbol *supertypes,
unsigned *supertype_count
) {
const TreeCursor *self = (const TreeCursor *)_self;
TSFieldId result = 0;
unsigned max_supertypes = *supertype_count;
*field_id = 0;
*supertype_count = 0;
*has_later_siblings = false;
*has_later_named_siblings = false;
*can_have_later_siblings_with_this_field = false;
@ -269,24 +274,31 @@ TSFieldId ts_tree_cursor_current_status(
parent_entry->subtree->ptr->production_id
);
// If the subtree is visible, return its public-facing symbol.
// Otherwise, return zero.
#define subtree_visible_symbol(subtree, structural_child_index) \
(( \
!ts_subtree_extra(subtree) && \
alias_sequence && \
alias_sequence[structural_child_index] \
) ? \
alias_sequence[structural_child_index] : \
ts_subtree_visible(subtree) ? \
ts_subtree_symbol(subtree) : \
0) \
#define subtree_symbol(subtree, structural_child_index) \
(( \
!ts_subtree_extra(subtree) && \
alias_sequence && \
alias_sequence[structural_child_index] \
) ? \
alias_sequence[structural_child_index] : \
ts_subtree_symbol(subtree))
// Stop walking up when a visible ancestor is found.
if (
i != self->stack.size - 1 &&
subtree_visible_symbol(*entry->subtree, entry->structural_child_index)
) break;
TSSymbol entry_symbol = subtree_symbol(
*entry->subtree,
entry->structural_child_index
);
TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
self->tree->language,
entry_symbol
);
if (i != self->stack.size - 1 && entry_metadata.visible) break;
// Record any supertypes
if (entry_metadata.supertype && *supertype_count < max_supertypes) {
supertypes[*supertype_count] = entry_symbol;
(*supertype_count)++;
}
// Determine if the current node has later siblings.
if (!*has_later_siblings) {
@ -295,19 +307,21 @@ TSFieldId ts_tree_cursor_current_status(
if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
Subtree sibling = parent_entry->subtree->ptr->children[j];
if (ts_subtree_visible_child_count(sibling) > 0) {
TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
self->tree->language,
subtree_symbol(sibling, structural_child_index)
);
if (sibling_metadata.visible) {
*has_later_siblings = true;
if (*has_later_named_siblings) break;
if (sibling.ptr->named_child_count > 0) {
if (sibling_metadata.named) {
*has_later_named_siblings = true;
break;
}
}
TSSymbol visible_symbol = subtree_visible_symbol(sibling, structural_child_index);
if (visible_symbol) {
} else if (ts_subtree_visible_child_count(sibling) > 0) {
*has_later_siblings = true;
if (*has_later_named_siblings) break;
if (ts_language_symbol_metadata(self->tree->language, visible_symbol).named) {
if (sibling.ptr->named_child_count > 0) {
*has_later_named_siblings = true;
break;
}
@ -316,7 +330,7 @@ TSFieldId ts_tree_cursor_current_status(
}
}
#undef subtree_visible_symbol
#undef subtree_metadata
if (!ts_subtree_extra(*entry->subtree)) {
const TSFieldMapEntry *field_map, *field_map_end;
@ -327,10 +341,10 @@ TSFieldId ts_tree_cursor_current_status(
);
// Look for a field name associated with the current node.
if (!result) {
if (!*field_id) {
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
if (!i->inherited && i->child_index == entry->structural_child_index) {
result = i->field_id;
*field_id = i->field_id;
*can_have_later_siblings_with_this_field = false;
break;
}
@ -338,9 +352,9 @@ TSFieldId ts_tree_cursor_current_status(
}
// Determine if the current node can have later siblings with the same field name.
if (result) {
if (*field_id) {
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
if (i->field_id == result && i->child_index > entry->structural_child_index) {
if (i->field_id == *field_id && i->child_index > entry->structural_child_index) {
*can_have_later_siblings_with_this_field = true;
break;
}
@ -348,8 +362,6 @@ TSFieldId ts_tree_cursor_current_status(
}
}
}
return result;
}
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {

View file

@ -16,6 +16,14 @@ typedef struct {
} TreeCursor;
void ts_tree_cursor_init(TreeCursor *, TSNode);
TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *, bool *);
void ts_tree_cursor_current_status(
const TSTreeCursor *,
TSFieldId *,
bool *,
bool *,
bool *,
TSSymbol *,
unsigned *
);
#endif // TREE_SITTER_TREE_CURSOR_H_

View file

@ -158,9 +158,17 @@ int a() {
(translation_unit
(function_definition
(primitive_type)
(function_declarator (identifier) (parameter_list))
(function_declarator
(identifier)
(parameter_list))
(compound_statement
(struct_specifier (type_identifier))
(ERROR (number_literal))
(primitive_type)
(ERROR (number_literal)))))
(declaration
(struct_specifier (type_identifier))
(init_declarator
(MISSING identifier)
(number_literal)))
(declaration
(primitive_type)
(init_declarator
(MISSING identifier)
(number_literal))))))