Merge pull request #965 from tree-sitter/query-fixes
Fix problems found in investigating dropped query matches
This commit is contained in:
commit
5a964b1b70
10 changed files with 210 additions and 31 deletions
|
|
@ -100,6 +100,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
variable_index: extra_non_terminal.index as u32,
|
||||
production,
|
||||
step_index: 1,
|
||||
has_preceding_inherited_fields: false,
|
||||
},
|
||||
&[Symbol::end_of_nonterminal_extra()]
|
||||
.iter()
|
||||
|
|
@ -197,16 +198,33 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
// next symbol. Advance the item to its next step and insert the resulting
|
||||
// item into the successor item set.
|
||||
if let Some(next_symbol) = item.symbol() {
|
||||
let successor = item.successor();
|
||||
let mut successor = item.successor();
|
||||
if next_symbol.is_non_terminal() {
|
||||
let variable = &self.syntax_grammar.variables[next_symbol.index];
|
||||
|
||||
// Keep track of where auxiliary non-terminals (repeat symbols) are
|
||||
// used within visible symbols. This information may be needed later
|
||||
// for conflict resolution.
|
||||
if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
|
||||
if variable.is_auxiliary() {
|
||||
preceding_auxiliary_symbols
|
||||
.push(self.get_auxiliary_node_info(&item_set, next_symbol));
|
||||
}
|
||||
|
||||
// For most parse items, the symbols associated with the preceding children
|
||||
// don't matter: they have no effect on the REDUCE action that would be
|
||||
// performed at the end of the item. But the symbols *do* matter for
|
||||
// children that are hidden and have fields, because those fields are
|
||||
// "inherited" by the parent node.
|
||||
//
|
||||
// If this item has consumed a hidden child with fields, then the symbols
|
||||
// of its preceding children need to be taken into account when comparing
|
||||
// it with other items.
|
||||
if variable.is_hidden()
|
||||
&& !self.variable_info[next_symbol.index].fields.is_empty()
|
||||
{
|
||||
successor.has_preceding_inherited_fields = true;
|
||||
}
|
||||
|
||||
non_terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(|| ParseItemSet::default())
|
||||
|
|
|
|||
|
|
@ -22,18 +22,42 @@ lazy_static! {
|
|||
};
|
||||
}
|
||||
|
||||
/// A ParseItem represents an in-progress match of a single production in a grammar.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct ParseItem<'a> {
|
||||
/// The index of the parent rule within the grammar.
|
||||
pub variable_index: u32,
|
||||
/// The number of symbols that have already been matched.
|
||||
pub step_index: u32,
|
||||
/// The production being matched.
|
||||
pub production: &'a Production,
|
||||
/// A boolean indicating whether any of the already-matched children were
|
||||
/// hidden nodes and had fields. Ordinarily, a parse item's behavior is not
|
||||
/// affected by the symbols of its preceding children; it only needs to
|
||||
/// keep track of their fields and aliases.
|
||||
///
|
||||
/// Take for example these two items:
|
||||
/// X -> a b • c
|
||||
/// X -> a g • c
|
||||
///
|
||||
/// They can be considered equivalent, for the purposes of parse table
|
||||
/// generation, because they entail the same actions. But if this flag is
|
||||
/// true, then the item's set of inherited fields may depend on the specific
|
||||
/// symbols of its preceding children.
|
||||
pub has_preceding_inherited_fields: bool,
|
||||
}
|
||||
|
||||
/// A ParseItemSet represents a set of in-progress matches of productions in a
|
||||
/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that
|
||||
/// are allowed to *follow* the in-progress rule. This object corresponds directly
|
||||
/// to a state in the final parse table.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSet<'a> {
|
||||
pub entries: Vec<(ParseItem<'a>, TokenSet)>,
|
||||
}
|
||||
|
||||
/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead
|
||||
/// information. Parse states with the same core are candidates for merging.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSetCore<'a> {
|
||||
pub entries: Vec<ParseItem<'a>>,
|
||||
|
|
@ -64,6 +88,7 @@ impl<'a> ParseItem<'a> {
|
|||
variable_index: u32::MAX,
|
||||
production: &START_PRODUCTION,
|
||||
step_index: 0,
|
||||
has_preceding_inherited_fields: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -100,13 +125,23 @@ impl<'a> ParseItem<'a> {
|
|||
self.variable_index == u32::MAX
|
||||
}
|
||||
|
||||
/// Create an item like this one, but advanced by one step.
|
||||
pub fn successor(&self) -> ParseItem<'a> {
|
||||
ParseItem {
|
||||
variable_index: self.variable_index,
|
||||
production: self.production,
|
||||
step_index: self.step_index + 1,
|
||||
has_preceding_inherited_fields: self.has_preceding_inherited_fields,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an item identical to this one, but with a different production.
|
||||
/// This is used when dynamically "inlining" certain symbols in a production.
|
||||
pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
|
||||
let mut result = self.clone();
|
||||
result.production = production;
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseItemSet<'a> {
|
||||
|
|
@ -258,19 +293,18 @@ impl<'a> Hash for ParseItem<'a> {
|
|||
self.precedence().hash(hasher);
|
||||
self.associativity().hash(hasher);
|
||||
|
||||
// When comparing two parse items, the symbols that were already consumed by
|
||||
// both items don't matter. Take for example these two items:
|
||||
// X -> a b • c
|
||||
// X -> a d • c
|
||||
// These two items can be considered equivalent, for the purposes of parse
|
||||
// table generation, because they entail the same actions. However, if the
|
||||
// productions have different aliases or field names, they *cannot* be
|
||||
// treated as equivalent, because those details are ultimately stored as
|
||||
// attributes of the `REDUCE` action that will be performed when the item
|
||||
// is finished.
|
||||
// The already-matched children don't play any role in the parse state for
|
||||
// this item, unless any of the following are true:
|
||||
// * the children have fields
|
||||
// * the children have aliases
|
||||
// * the children are hidden and
|
||||
// See the docs for `has_preceding_inherited_fields`.
|
||||
for step in &self.production.steps[0..self.step_index as usize] {
|
||||
step.alias.hash(hasher);
|
||||
step.field_name.hash(hasher);
|
||||
if self.has_preceding_inherited_fields {
|
||||
step.symbol.hash(hasher);
|
||||
}
|
||||
}
|
||||
for step in &self.production.steps[self.step_index as usize..] {
|
||||
step.hash(hasher);
|
||||
|
|
@ -286,6 +320,7 @@ impl<'a> PartialEq for ParseItem<'a> {
|
|||
|| self.production.steps.len() != other.production.steps.len()
|
||||
|| self.precedence() != other.precedence()
|
||||
|| self.associativity() != other.associativity()
|
||||
|| self.has_preceding_inherited_fields != other.has_preceding_inherited_fields
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
@ -300,6 +335,11 @@ impl<'a> PartialEq for ParseItem<'a> {
|
|||
if step.field_name != other.production.steps[i].field_name {
|
||||
return false;
|
||||
}
|
||||
if self.has_preceding_inherited_fields
|
||||
&& step.symbol != other.production.steps[i].symbol
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} else if *step != other.production.steps[i] {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -204,6 +204,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
variable_index,
|
||||
production,
|
||||
step_index: 0,
|
||||
has_preceding_inherited_fields: false,
|
||||
};
|
||||
|
||||
if let Some(inlined_productions) =
|
||||
|
|
@ -213,11 +214,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
find_or_push(
|
||||
additions_for_non_terminal,
|
||||
TransitiveClosureAddition {
|
||||
item: ParseItem {
|
||||
variable_index,
|
||||
production,
|
||||
step_index: item.step_index,
|
||||
},
|
||||
item: item.substitute_production(production),
|
||||
info: follow_set_info.clone(),
|
||||
},
|
||||
);
|
||||
|
|
@ -248,11 +245,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
for production in productions {
|
||||
self.add_item(
|
||||
&mut result,
|
||||
ParseItem {
|
||||
variable_index: item.variable_index,
|
||||
production,
|
||||
step_index: item.step_index,
|
||||
},
|
||||
item.substitute_production(production),
|
||||
lookaheads,
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -238,6 +238,10 @@ impl SyntaxVariable {
|
|||
pub fn is_auxiliary(&self) -> bool {
|
||||
self.kind == VariableType::Auxiliary
|
||||
}
|
||||
|
||||
pub fn is_hidden(&self) -> bool {
|
||||
self.kind == VariableType::Hidden || self.kind == VariableType::Auxiliary
|
||||
}
|
||||
}
|
||||
|
||||
impl InlinedProductionMap {
|
||||
|
|
|
|||
|
|
@ -1425,6 +1425,7 @@ fn test_query_matches_with_too_many_permutations_to_track() {
|
|||
collect_matches(matches, &query, source.as_str())[0],
|
||||
(0, vec![("pre", "hello"), ("post", "hello")]),
|
||||
);
|
||||
assert_eq!(cursor.did_exceed_match_limit(), true);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -1462,6 +1463,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() {
|
|||
collect_matches(matches, &query, source.as_str()),
|
||||
vec![(1, vec![("method", "b")]); 50],
|
||||
);
|
||||
assert_eq!(cursor.did_exceed_match_limit(), true);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -1850,7 +1852,7 @@ fn test_query_matches_with_repeated_fields() {
|
|||
"
|
||||
struct S {
|
||||
int a, b, c;
|
||||
}
|
||||
};
|
||||
",
|
||||
&[
|
||||
(0, vec![("field", "a")]),
|
||||
|
|
@ -1861,6 +1863,94 @@ fn test_query_matches_with_repeated_fields() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_deeply_nested_patterns_with_fields() {
|
||||
allocations::record(|| {
|
||||
let language = get_language("python");
|
||||
let query = Query::new(
|
||||
language,
|
||||
"
|
||||
(call
|
||||
function: (_) @func
|
||||
arguments: (_) @args)
|
||||
(call
|
||||
function: (attribute
|
||||
object: (_) @receiver
|
||||
attribute: (identifier) @method)
|
||||
arguments: (argument_list))
|
||||
|
||||
; These don't match anything, but they require additional
|
||||
; states to keep track of their captures.
|
||||
(call
|
||||
function: (_) @fn
|
||||
arguments: (argument_list
|
||||
(keyword_argument
|
||||
name: (identifier) @name
|
||||
value: (_) @val) @arg) @args) @call
|
||||
(call
|
||||
function: (identifier) @fn
|
||||
(#eq? @fn \"super\")) @super_call
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_query_matches(
|
||||
language,
|
||||
&query,
|
||||
"
|
||||
a(1).b(2).c(3).d(4).e(5).f(6).g(7).h(8)
|
||||
",
|
||||
&[
|
||||
(0, vec![("func", "a"), ("args", "(1)")]),
|
||||
(0, vec![("func", "a(1).b"), ("args", "(2)")]),
|
||||
(1, vec![("receiver", "a(1)"), ("method", "b")]),
|
||||
(0, vec![("func", "a(1).b(2).c"), ("args", "(3)")]),
|
||||
(1, vec![("receiver", "a(1).b(2)"), ("method", "c")]),
|
||||
(0, vec![("func", "a(1).b(2).c(3).d"), ("args", "(4)")]),
|
||||
(1, vec![("receiver", "a(1).b(2).c(3)"), ("method", "d")]),
|
||||
(0, vec![("func", "a(1).b(2).c(3).d(4).e"), ("args", "(5)")]),
|
||||
(
|
||||
1,
|
||||
vec![("receiver", "a(1).b(2).c(3).d(4)"), ("method", "e")],
|
||||
),
|
||||
(
|
||||
0,
|
||||
vec![("func", "a(1).b(2).c(3).d(4).e(5).f"), ("args", "(6)")],
|
||||
),
|
||||
(
|
||||
1,
|
||||
vec![("receiver", "a(1).b(2).c(3).d(4).e(5)"), ("method", "f")],
|
||||
),
|
||||
(
|
||||
0,
|
||||
vec![("func", "a(1).b(2).c(3).d(4).e(5).f(6).g"), ("args", "(7)")],
|
||||
),
|
||||
(
|
||||
1,
|
||||
vec![
|
||||
("receiver", "a(1).b(2).c(3).d(4).e(5).f(6)"),
|
||||
("method", "g"),
|
||||
],
|
||||
),
|
||||
(
|
||||
0,
|
||||
vec![
|
||||
("func", "a(1).b(2).c(3).d(4).e(5).f(6).g(7).h"),
|
||||
("args", "(8)"),
|
||||
],
|
||||
),
|
||||
(
|
||||
1,
|
||||
vec![
|
||||
("receiver", "a(1).b(2).c(3).d(4).e(5).f(6).g(7)"),
|
||||
("method", "h"),
|
||||
],
|
||||
),
|
||||
],
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_matches_with_indefinite_step_containing_no_captures() {
|
||||
allocations::record(|| {
|
||||
|
|
@ -3031,6 +3121,7 @@ fn assert_query_matches(
|
|||
let mut cursor = QueryCursor::new();
|
||||
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
|
||||
assert_eq!(collect_matches(matches, &query, source), expected);
|
||||
assert_eq!(cursor.did_exceed_match_limit(), false);
|
||||
}
|
||||
|
||||
fn collect_matches<'a>(
|
||||
|
|
|
|||
|
|
@ -720,6 +720,16 @@ extern "C" {
|
|||
#[doc = " Start running a given query on a given node."]
|
||||
pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode);
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Check if this cursor has exceeded its maximum number of in-progress"]
|
||||
#[doc = " matches."]
|
||||
#[doc = ""]
|
||||
#[doc = " Currently, query cursors have a fixed capacity for storing lists"]
|
||||
#[doc = " of in-progress captures. If this capacity is exceeded, then the"]
|
||||
#[doc = " earliest-starting match will silently be dropped to make room for"]
|
||||
#[doc = " further matches."]
|
||||
pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Set the range of bytes or (row, column) positions in which the query"]
|
||||
#[doc = " will be executed."]
|
||||
|
|
|
|||
|
|
@ -1595,6 +1595,12 @@ impl QueryCursor {
|
|||
QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) })
|
||||
}
|
||||
|
||||
/// Check if, on its last execution, this cursor exceeded its maximum number of
|
||||
/// in-progress matches.
|
||||
pub fn did_exceed_match_limit(&self) -> bool {
|
||||
unsafe { ffi::ts_query_cursor_did_exceed_match_limit(self.0.as_ptr()) }
|
||||
}
|
||||
|
||||
/// Iterate over all of the matches in the order that they were found.
|
||||
///
|
||||
/// Each match contains the index of the pattern that matched, and a list of captures.
|
||||
|
|
|
|||
|
|
@ -791,6 +791,17 @@ void ts_query_cursor_delete(TSQueryCursor *);
|
|||
*/
|
||||
void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
|
||||
|
||||
/**
|
||||
* Check if this cursor has exceeded its maximum number of in-progress
|
||||
* matches.
|
||||
*
|
||||
* Currently, query cursors have a fixed capacity for storing lists
|
||||
* of in-progress captures. If this capacity is exceeded, then the
|
||||
* earliest-starting match will silently be dropped to make room for
|
||||
* further matches.
|
||||
*/
|
||||
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *);
|
||||
|
||||
/**
|
||||
* Set the range of bytes or (row, column) positions in which the query
|
||||
* will be executed.
|
||||
|
|
|
|||
|
|
@ -236,6 +236,7 @@ struct TSQueryCursor {
|
|||
TSPoint end_point;
|
||||
bool ascending;
|
||||
bool halted;
|
||||
bool did_exceed_match_limit;
|
||||
};
|
||||
|
||||
static const TSQueryError PARENT_DONE = -1;
|
||||
|
|
@ -2103,6 +2104,7 @@ void ts_query_disable_pattern(
|
|||
TSQueryCursor *ts_query_cursor_new(void) {
|
||||
TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
|
||||
*self = (TSQueryCursor) {
|
||||
.did_exceed_match_limit = false,
|
||||
.ascending = false,
|
||||
.halted = false,
|
||||
.states = array_new(),
|
||||
|
|
@ -2126,6 +2128,10 @@ void ts_query_cursor_delete(TSQueryCursor *self) {
|
|||
ts_free(self);
|
||||
}
|
||||
|
||||
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
|
||||
return self->did_exceed_match_limit;
|
||||
}
|
||||
|
||||
void ts_query_cursor_exec(
|
||||
TSQueryCursor *self,
|
||||
const TSQuery *query,
|
||||
|
|
@ -2140,6 +2146,7 @@ void ts_query_cursor_exec(
|
|||
self->ascending = false;
|
||||
self->halted = false;
|
||||
self->query = query;
|
||||
self->did_exceed_match_limit = false;
|
||||
}
|
||||
|
||||
void ts_query_cursor_set_byte_range(
|
||||
|
|
@ -2359,6 +2366,7 @@ static CaptureList *ts_query_cursor__prepare_to_capture(
|
|||
// state has captured the earliest node in the document, and steal its
|
||||
// capture list.
|
||||
if (state->capture_list_id == NONE) {
|
||||
self->did_exceed_match_limit = true;
|
||||
uint32_t state_index, byte_offset, pattern_index;
|
||||
if (
|
||||
ts_query_cursor__first_in_progress_capture(
|
||||
|
|
|
|||
|
|
@ -353,14 +353,12 @@ void ts_tree_cursor_current_status(
|
|||
// Determine if the current node can have later siblings with the same field name.
|
||||
if (*field_id) {
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (i->field_id == *field_id) {
|
||||
if (
|
||||
i->child_index > entry->structural_child_index ||
|
||||
(i->child_index == entry->structural_child_index && *has_later_named_siblings)
|
||||
) {
|
||||
*can_have_later_siblings_with_this_field = true;
|
||||
break;
|
||||
}
|
||||
if (
|
||||
i->field_id == *field_id &&
|
||||
i->child_index > entry->structural_child_index
|
||||
) {
|
||||
*can_have_later_siblings_with_this_field = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue