Merge pull request #738 from tree-sitter/supertype-queries
Allow tree queries to match on nodes' supertypes
This commit is contained in:
commit
4d99e23946
11 changed files with 219 additions and 68 deletions
|
|
@ -68,6 +68,7 @@ impl<'a> Minimizer<'a> {
|
|||
..
|
||||
} => {
|
||||
if !self.simple_aliases.contains_key(&symbol)
|
||||
&& !self.syntax_grammar.supertype_symbols.contains(&symbol)
|
||||
&& !aliased_symbols.contains(&symbol)
|
||||
&& self.syntax_grammar.variables[symbol.index].kind
|
||||
!= VariableType::Named
|
||||
|
|
|
|||
|
|
@ -325,15 +325,8 @@ pub(crate) fn get_variable_info(
|
|||
}
|
||||
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
let variable = &syntax_grammar.variables[supertype_symbol.index];
|
||||
if variable.kind != VariableType::Hidden {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must be hidden, but `{}` is not",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
|
||||
if result[supertype_symbol.index].has_multi_step_production {
|
||||
let variable = &syntax_grammar.variables[supertype_symbol.index];
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
|
||||
variable.name
|
||||
|
|
|
|||
|
|
@ -73,6 +73,12 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
);
|
||||
}
|
||||
|
||||
for (i, variable) in variables.iter_mut().enumerate() {
|
||||
if supertype_symbols.contains(&Symbol::non_terminal(i)) {
|
||||
variable.kind = VariableType::Hidden;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(InternedGrammar {
|
||||
variables,
|
||||
external_tokens,
|
||||
|
|
|
|||
|
|
@ -460,6 +460,9 @@ impl Generator {
|
|||
VariableType::Hidden => {
|
||||
add_line!(self, ".visible = false,");
|
||||
add_line!(self, ".named = true,");
|
||||
if self.syntax_grammar.supertype_symbols.contains(symbol) {
|
||||
add_line!(self, ".supertype = true,");
|
||||
}
|
||||
}
|
||||
VariableType::Auxiliary => {
|
||||
add_line!(self, ".visible = false,");
|
||||
|
|
|
|||
|
|
@ -291,6 +291,24 @@ fn test_query_errors_on_impossible_patterns() {
|
|||
.join("\n")
|
||||
))
|
||||
);
|
||||
|
||||
Query::new(
|
||||
js_lang,
|
||||
"(if_statement
|
||||
condition: (parenthesized_expression (_expression) @cond))",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
Query::new(js_lang, "(if_statement condition: (_expression))",),
|
||||
Err(QueryError::Structure(
|
||||
1,
|
||||
[
|
||||
"(if_statement condition: (_expression))", //
|
||||
" ^",
|
||||
]
|
||||
.join("\n")
|
||||
))
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -701,7 +719,6 @@ fn test_query_matches_with_immediate_siblings() {
|
|||
(2, vec![("last-stmt", "g()")]),
|
||||
],
|
||||
);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -1395,6 +1412,48 @@ fn test_query_matches_with_anonymous_tokens() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_supertypes() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("python");
|
||||
let query = Query::new(
|
||||
language,
|
||||
r#"
|
||||
(argument_list (_expression) @arg)
|
||||
|
||||
(keyword_argument
|
||||
value: (_expression) @kw_arg)
|
||||
|
||||
(assignment
|
||||
left: (left_hand_side (identifier) @var_def))
|
||||
|
||||
(_primary_expression/identifier) @var_ref
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_query_matches(
|
||||
language,
|
||||
&query,
|
||||
"
|
||||
a = b.c(
|
||||
[d],
|
||||
# a comment
|
||||
e=f
|
||||
)
|
||||
",
|
||||
&[
|
||||
(2, vec![("var_def", "a")]),
|
||||
(3, vec![("var_ref", "b")]),
|
||||
(0, vec![("arg", "[d]")]),
|
||||
(3, vec![("var_ref", "d")]),
|
||||
(1, vec![("kw_arg", "f")]),
|
||||
(3, vec![("var_ref", "f")]),
|
||||
],
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_within_byte_range() {
|
||||
allocations::record(|| {
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ typedef uint16_t TSStateId;
|
|||
typedef struct {
|
||||
bool visible : 1;
|
||||
bool named : 1;
|
||||
bool supertype: 1;
|
||||
} TSSymbolMetadata;
|
||||
|
||||
typedef struct TSLexer TSLexer;
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ TSSymbol ts_language_symbol_for_name(
|
|||
uint32_t count = ts_language_symbol_count(self);
|
||||
for (TSSymbol i = 0; i < count; i++) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
|
||||
if (!metadata.visible || metadata.named != is_named) continue;
|
||||
if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
|
||||
const char *symbol_name = self->symbol_names[i];
|
||||
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
|
||||
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
|
||||
|
|
|
|||
100
lib/src/query.c
100
lib/src/query.c
|
|
@ -47,6 +47,7 @@ typedef struct {
|
|||
*/
|
||||
typedef struct {
|
||||
TSSymbol symbol;
|
||||
TSSymbol supertype_symbol;
|
||||
TSFieldId field;
|
||||
uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
|
||||
uint16_t alternative_index;
|
||||
|
|
@ -638,6 +639,13 @@ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
|
|||
return &self->stack[self->depth - 1];
|
||||
}
|
||||
|
||||
static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
|
||||
for (unsigned i = 0; i < self->depth; i++) {
|
||||
if (self->stack[i].parent_symbol == symbol) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/***********************
|
||||
* AnalysisSubgraphNode
|
||||
***********************/
|
||||
|
|
@ -1133,6 +1141,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
|
|||
if (step->field && step->field != field_id) {
|
||||
does_match = false;
|
||||
}
|
||||
if (
|
||||
step->supertype_symbol &&
|
||||
!analysis_state__has_supertype(state, step->supertype_symbol)
|
||||
) does_match = false;
|
||||
}
|
||||
|
||||
// If this is a hidden child, then push a new entry to the stack, in order to
|
||||
|
|
@ -1626,14 +1638,9 @@ static TSQueryError ts_query__parse_pattern(
|
|||
else {
|
||||
TSSymbol symbol;
|
||||
|
||||
// Parse the wildcard symbol
|
||||
if (
|
||||
stream->next == '_' ||
|
||||
|
||||
// TODO - remove.
|
||||
// For temporary backward compatibility, handle '*' as a wildcard.
|
||||
stream->next == '*'
|
||||
) {
|
||||
// TODO - remove.
|
||||
// For temporary backward compatibility, handle '*' as a wildcard.
|
||||
if (stream->next == '*') {
|
||||
symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
|
||||
stream_advance(stream);
|
||||
}
|
||||
|
|
@ -1651,15 +1658,22 @@ static TSQueryError ts_query__parse_pattern(
|
|||
return ts_query__parse_predicate(self, stream);
|
||||
}
|
||||
|
||||
symbol = ts_language_symbol_for_name(
|
||||
self->language,
|
||||
node_name,
|
||||
length,
|
||||
true
|
||||
);
|
||||
if (!symbol) {
|
||||
stream_reset(stream, node_name);
|
||||
return TSQueryErrorNodeType;
|
||||
// Parse the wildcard symbol
|
||||
else if (length == 1 && node_name[0] == '_') {
|
||||
symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
|
||||
}
|
||||
|
||||
else {
|
||||
symbol = ts_language_symbol_for_name(
|
||||
self->language,
|
||||
node_name,
|
||||
length,
|
||||
true
|
||||
);
|
||||
if (!symbol) {
|
||||
stream_reset(stream, node_name);
|
||||
return TSQueryErrorNodeType;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return TSQueryErrorSyntax;
|
||||
|
|
@ -1667,9 +1681,38 @@ static TSQueryError ts_query__parse_pattern(
|
|||
|
||||
// Add a step for the node.
|
||||
array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
|
||||
if (ts_language_symbol_metadata(self->language, symbol).supertype) {
|
||||
QueryStep *step = array_back(&self->steps);
|
||||
step->supertype_symbol = step->symbol;
|
||||
step->symbol = NAMED_WILDCARD_SYMBOL;
|
||||
}
|
||||
|
||||
stream_skip_whitespace(stream);
|
||||
|
||||
if (stream->next == '/') {
|
||||
stream_advance(stream);
|
||||
if (!stream_is_ident_start(stream)) {
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
|
||||
const char *node_name = stream->input;
|
||||
stream_scan_identifier(stream);
|
||||
uint32_t length = stream->input - node_name;
|
||||
|
||||
QueryStep *step = array_back(&self->steps);
|
||||
step->symbol = ts_language_symbol_for_name(
|
||||
self->language,
|
||||
node_name,
|
||||
length,
|
||||
true
|
||||
);
|
||||
if (!step->symbol) {
|
||||
stream_reset(stream, node_name);
|
||||
return TSQueryErrorNodeType;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the child patterns
|
||||
stream_skip_whitespace(stream);
|
||||
bool child_is_immediate = false;
|
||||
uint16_t child_start_step_index = self->steps.size;
|
||||
for (;;) {
|
||||
|
|
@ -2552,11 +2595,17 @@ static inline bool ts_query_cursor__advance(
|
|||
bool has_later_siblings;
|
||||
bool has_later_named_siblings;
|
||||
bool can_have_later_siblings_with_this_field;
|
||||
TSFieldId field_id = ts_tree_cursor_current_status(
|
||||
TSFieldId field_id = 0;
|
||||
TSSymbol supertypes[8] = {0};
|
||||
unsigned supertype_count = 8;
|
||||
ts_tree_cursor_current_status(
|
||||
&self->cursor,
|
||||
&field_id,
|
||||
&has_later_siblings,
|
||||
&has_later_named_siblings,
|
||||
&can_have_later_siblings_with_this_field
|
||||
&can_have_later_siblings_with_this_field,
|
||||
supertypes,
|
||||
&supertype_count
|
||||
);
|
||||
LOG(
|
||||
"enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
|
||||
|
|
@ -2575,6 +2624,7 @@ static inline bool ts_query_cursor__advance(
|
|||
// If this node matches the first step of the pattern, then add a new
|
||||
// state at the start of this pattern.
|
||||
if (step->field && field_id != step->field) continue;
|
||||
if (step->supertype_symbol && !supertype_count) continue;
|
||||
ts_query_cursor__add_state(self, pattern);
|
||||
}
|
||||
|
||||
|
|
@ -2622,6 +2672,16 @@ static inline bool ts_query_cursor__advance(
|
|||
if (step->is_last_child && has_later_named_siblings) {
|
||||
node_does_match = false;
|
||||
}
|
||||
if (step->supertype_symbol) {
|
||||
bool has_supertype = false;
|
||||
for (unsigned j = 0; j < supertype_count; j++) {
|
||||
if (supertypes[j] == step->supertype_symbol) {
|
||||
has_supertype = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!has_supertype) node_does_match = false;
|
||||
}
|
||||
if (step->field) {
|
||||
if (step->field == field_id) {
|
||||
if (!can_have_later_siblings_with_this_field) {
|
||||
|
|
|
|||
|
|
@ -246,14 +246,19 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
|
|||
|
||||
// Private - Get various facts about the current node that are needed
|
||||
// when executing tree queries.
|
||||
TSFieldId ts_tree_cursor_current_status(
|
||||
void ts_tree_cursor_current_status(
|
||||
const TSTreeCursor *_self,
|
||||
TSFieldId *field_id,
|
||||
bool *has_later_siblings,
|
||||
bool *has_later_named_siblings,
|
||||
bool *can_have_later_siblings_with_this_field
|
||||
bool *can_have_later_siblings_with_this_field,
|
||||
TSSymbol *supertypes,
|
||||
unsigned *supertype_count
|
||||
) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
TSFieldId result = 0;
|
||||
unsigned max_supertypes = *supertype_count;
|
||||
*field_id = 0;
|
||||
*supertype_count = 0;
|
||||
*has_later_siblings = false;
|
||||
*has_later_named_siblings = false;
|
||||
*can_have_later_siblings_with_this_field = false;
|
||||
|
|
@ -269,24 +274,31 @@ TSFieldId ts_tree_cursor_current_status(
|
|||
parent_entry->subtree->ptr->production_id
|
||||
);
|
||||
|
||||
// If the subtree is visible, return its public-facing symbol.
|
||||
// Otherwise, return zero.
|
||||
#define subtree_visible_symbol(subtree, structural_child_index) \
|
||||
(( \
|
||||
!ts_subtree_extra(subtree) && \
|
||||
alias_sequence && \
|
||||
alias_sequence[structural_child_index] \
|
||||
) ? \
|
||||
alias_sequence[structural_child_index] : \
|
||||
ts_subtree_visible(subtree) ? \
|
||||
ts_subtree_symbol(subtree) : \
|
||||
0) \
|
||||
#define subtree_symbol(subtree, structural_child_index) \
|
||||
(( \
|
||||
!ts_subtree_extra(subtree) && \
|
||||
alias_sequence && \
|
||||
alias_sequence[structural_child_index] \
|
||||
) ? \
|
||||
alias_sequence[structural_child_index] : \
|
||||
ts_subtree_symbol(subtree))
|
||||
|
||||
// Stop walking up when a visible ancestor is found.
|
||||
if (
|
||||
i != self->stack.size - 1 &&
|
||||
subtree_visible_symbol(*entry->subtree, entry->structural_child_index)
|
||||
) break;
|
||||
TSSymbol entry_symbol = subtree_symbol(
|
||||
*entry->subtree,
|
||||
entry->structural_child_index
|
||||
);
|
||||
TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
|
||||
self->tree->language,
|
||||
entry_symbol
|
||||
);
|
||||
if (i != self->stack.size - 1 && entry_metadata.visible) break;
|
||||
|
||||
// Record any supertypes
|
||||
if (entry_metadata.supertype && *supertype_count < max_supertypes) {
|
||||
supertypes[*supertype_count] = entry_symbol;
|
||||
(*supertype_count)++;
|
||||
}
|
||||
|
||||
// Determine if the current node has later siblings.
|
||||
if (!*has_later_siblings) {
|
||||
|
|
@ -295,19 +307,21 @@ TSFieldId ts_tree_cursor_current_status(
|
|||
if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
|
||||
for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
|
||||
Subtree sibling = parent_entry->subtree->ptr->children[j];
|
||||
if (ts_subtree_visible_child_count(sibling) > 0) {
|
||||
TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
|
||||
self->tree->language,
|
||||
subtree_symbol(sibling, structural_child_index)
|
||||
);
|
||||
if (sibling_metadata.visible) {
|
||||
*has_later_siblings = true;
|
||||
if (*has_later_named_siblings) break;
|
||||
if (sibling.ptr->named_child_count > 0) {
|
||||
if (sibling_metadata.named) {
|
||||
*has_later_named_siblings = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
TSSymbol visible_symbol = subtree_visible_symbol(sibling, structural_child_index);
|
||||
if (visible_symbol) {
|
||||
} else if (ts_subtree_visible_child_count(sibling) > 0) {
|
||||
*has_later_siblings = true;
|
||||
if (*has_later_named_siblings) break;
|
||||
if (ts_language_symbol_metadata(self->tree->language, visible_symbol).named) {
|
||||
if (sibling.ptr->named_child_count > 0) {
|
||||
*has_later_named_siblings = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -316,7 +330,7 @@ TSFieldId ts_tree_cursor_current_status(
|
|||
}
|
||||
}
|
||||
|
||||
#undef subtree_visible_symbol
|
||||
#undef subtree_metadata
|
||||
|
||||
if (!ts_subtree_extra(*entry->subtree)) {
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
|
|
@ -327,10 +341,10 @@ TSFieldId ts_tree_cursor_current_status(
|
|||
);
|
||||
|
||||
// Look for a field name associated with the current node.
|
||||
if (!result) {
|
||||
if (!*field_id) {
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (!i->inherited && i->child_index == entry->structural_child_index) {
|
||||
result = i->field_id;
|
||||
*field_id = i->field_id;
|
||||
*can_have_later_siblings_with_this_field = false;
|
||||
break;
|
||||
}
|
||||
|
|
@ -338,9 +352,9 @@ TSFieldId ts_tree_cursor_current_status(
|
|||
}
|
||||
|
||||
// Determine if the current node can have later siblings with the same field name.
|
||||
if (result) {
|
||||
if (*field_id) {
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (i->field_id == result && i->child_index > entry->structural_child_index) {
|
||||
if (i->field_id == *field_id && i->child_index > entry->structural_child_index) {
|
||||
*can_have_later_siblings_with_this_field = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -348,8 +362,6 @@ TSFieldId ts_tree_cursor_current_status(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,14 @@ typedef struct {
|
|||
} TreeCursor;
|
||||
|
||||
void ts_tree_cursor_init(TreeCursor *, TSNode);
|
||||
TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *, bool *);
|
||||
void ts_tree_cursor_current_status(
|
||||
const TSTreeCursor *,
|
||||
TSFieldId *,
|
||||
bool *,
|
||||
bool *,
|
||||
bool *,
|
||||
TSSymbol *,
|
||||
unsigned *
|
||||
);
|
||||
|
||||
#endif // TREE_SITTER_TREE_CURSOR_H_
|
||||
|
|
|
|||
18
test/fixtures/error_corpus/c_errors.txt
vendored
18
test/fixtures/error_corpus/c_errors.txt
vendored
|
|
@ -158,9 +158,17 @@ int a() {
|
|||
(translation_unit
|
||||
(function_definition
|
||||
(primitive_type)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(function_declarator
|
||||
(identifier)
|
||||
(parameter_list))
|
||||
(compound_statement
|
||||
(struct_specifier (type_identifier))
|
||||
(ERROR (number_literal))
|
||||
(primitive_type)
|
||||
(ERROR (number_literal)))))
|
||||
(declaration
|
||||
(struct_specifier (type_identifier))
|
||||
(init_declarator
|
||||
(MISSING identifier)
|
||||
(number_literal)))
|
||||
(declaration
|
||||
(primitive_type)
|
||||
(init_declarator
|
||||
(MISSING identifier)
|
||||
(number_literal))))))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue