Improve capture quantifier computation
Compute quantifiers in a bottom-up manner, which allows more precise results for alternations, where the quantifiers are now precisly joined.
This commit is contained in:
parent
9bac066330
commit
1f1a449c76
5 changed files with 506 additions and 78 deletions
|
|
@ -7,8 +7,8 @@ use lazy_static::lazy_static;
|
|||
use rand::{prelude::StdRng, SeedableRng};
|
||||
use std::{env, fmt::Write};
|
||||
use tree_sitter::{
|
||||
Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, QueryErrorKind,
|
||||
QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty,
|
||||
CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError,
|
||||
QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
|
|
@ -3818,6 +3818,197 @@ fn test_query_is_pattern_guaranteed_at_step() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_capture_quantifiers() {
|
||||
struct Row {
|
||||
description: &'static str,
|
||||
language: Language,
|
||||
pattern: &'static str,
|
||||
capture_quantifiers: &'static [(&'static str, CaptureQuantifier)],
|
||||
}
|
||||
|
||||
let rows = &[
|
||||
// Simple quantifiers
|
||||
Row {
|
||||
description: "Top level capture",
|
||||
language: get_language("python"),
|
||||
pattern: r#"
|
||||
(module) @mod
|
||||
"#,
|
||||
capture_quantifiers: &[("mod", CaptureQuantifier::One)],
|
||||
},
|
||||
Row {
|
||||
description: "Nested list capture capture",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(array (_)* @elems) @array
|
||||
"#,
|
||||
capture_quantifiers: &[
|
||||
("array", CaptureQuantifier::One),
|
||||
("elems", CaptureQuantifier::ZeroOrMore),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "Nested non-empty list capture capture",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(array (_)+ @elems) @array
|
||||
"#,
|
||||
capture_quantifiers: &[
|
||||
("array", CaptureQuantifier::One),
|
||||
("elems", CaptureQuantifier::OneOrMore),
|
||||
],
|
||||
},
|
||||
// Nested quantifiers
|
||||
Row {
|
||||
description: "capture nested in optional pattern",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(array (call_expression (arguments (_) @arg))? @call) @array
|
||||
"#,
|
||||
capture_quantifiers: &[
|
||||
("array", CaptureQuantifier::One),
|
||||
("call", CaptureQuantifier::ZeroOrOne),
|
||||
("arg", CaptureQuantifier::ZeroOrOne),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "optional capture nested in non-empty list pattern",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(array (call_expression (arguments (_)? @arg))+ @call) @array
|
||||
"#,
|
||||
capture_quantifiers: &[
|
||||
("array", CaptureQuantifier::One),
|
||||
("call", CaptureQuantifier::OneOrMore),
|
||||
("arg", CaptureQuantifier::ZeroOrMore),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "non-empty list capture nested in optional pattern",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(array (call_expression (arguments (_)+ @args))? @call) @array
|
||||
"#,
|
||||
capture_quantifiers: &[
|
||||
("array", CaptureQuantifier::One),
|
||||
("call", CaptureQuantifier::ZeroOrOne),
|
||||
("args", CaptureQuantifier::ZeroOrMore),
|
||||
],
|
||||
},
|
||||
// Quantifiers in alternations
|
||||
Row {
|
||||
description: "capture is the same in all alternatives",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"[
|
||||
(function_declaration name:(identifier) @name)
|
||||
(call_expression function:(identifier) @name)
|
||||
]"#,
|
||||
capture_quantifiers: &[("name", CaptureQuantifier::One)],
|
||||
},
|
||||
Row {
|
||||
description: "capture appears in some alternatives",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"[
|
||||
(function_declaration name:(identifier) @name)
|
||||
(function)
|
||||
] @fun"#,
|
||||
capture_quantifiers: &[
|
||||
("fun", CaptureQuantifier::One),
|
||||
("name", CaptureQuantifier::ZeroOrOne),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "capture has different quantifiers in alternatives",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"[
|
||||
(call_expression arguments:(arguments (_)+ @args))
|
||||
(new_expression arguments:(arguments (_)? @args))
|
||||
] @call"#,
|
||||
capture_quantifiers: &[
|
||||
("call", CaptureQuantifier::One),
|
||||
("args", CaptureQuantifier::ZeroOrMore),
|
||||
],
|
||||
},
|
||||
// Quantifiers in siblings
|
||||
Row {
|
||||
description: "siblings have different captures with different quantifiers",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(call_expression (arguments (identifier)? @self (_)* @args)) @call
|
||||
"#,
|
||||
capture_quantifiers: &[
|
||||
("call", CaptureQuantifier::One),
|
||||
("self", CaptureQuantifier::ZeroOrOne),
|
||||
("args", CaptureQuantifier::ZeroOrMore),
|
||||
],
|
||||
},
|
||||
Row {
|
||||
description: "siblings have same capture with different quantifiers",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(call_expression (arguments (identifier) @args (_)* @args)) @call
|
||||
"#,
|
||||
capture_quantifiers: &[
|
||||
("call", CaptureQuantifier::One),
|
||||
("args", CaptureQuantifier::OneOrMore),
|
||||
],
|
||||
},
|
||||
// Combined nesting,
|
||||
Row {
|
||||
description: "combined nesting, alterantives, and siblings",
|
||||
language: get_language("javascript"),
|
||||
pattern: r#"
|
||||
(array
|
||||
(call_expression
|
||||
(arguments [
|
||||
(identifier) @self
|
||||
(_)+ @args
|
||||
])
|
||||
)+ @call
|
||||
) @array
|
||||
"#,
|
||||
capture_quantifiers: &[
|
||||
("array", CaptureQuantifier::One),
|
||||
("call", CaptureQuantifier::OneOrMore),
|
||||
("self", CaptureQuantifier::ZeroOrMore),
|
||||
("args", CaptureQuantifier::ZeroOrMore),
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
allocations::record(|| {
|
||||
eprintln!("");
|
||||
|
||||
for row in rows.iter() {
|
||||
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
|
||||
if !row.description.contains(filter.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
eprintln!(" query example: {:?}", row.description);
|
||||
let query = Query::new(row.language, row.pattern).unwrap();
|
||||
for (capture, expected_quantifier) in row.capture_quantifiers {
|
||||
let index = query.capture_index_for_name(capture).unwrap();
|
||||
let actual_quantifier = query.capture_quantifiers()[index as usize];
|
||||
assert_eq!(
|
||||
actual_quantifier,
|
||||
*expected_quantifier,
|
||||
"Description: {}, Pattern: {:?}, expected quantifier of @{} to be {:?} instead of {:?}",
|
||||
row.description,
|
||||
row.pattern
|
||||
.split_ascii_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" "),
|
||||
capture,
|
||||
*expected_quantifier,
|
||||
actual_quantifier,
|
||||
)
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn assert_query_matches(
|
||||
language: Language,
|
||||
query: &Query,
|
||||
|
|
|
|||
|
|
@ -107,10 +107,11 @@ pub struct TSQueryCapture {
|
|||
pub node: TSNode,
|
||||
pub index: u32,
|
||||
}
|
||||
pub const TSQuantifier_One: TSQuantifier = 0;
|
||||
pub const TSQuantifier_OneOrMore: TSQuantifier = 1;
|
||||
pub const TSQuantifier_ZeroOrOne: TSQuantifier = 2;
|
||||
pub const TSQuantifier_ZeroOrMore: TSQuantifier = 3;
|
||||
pub const TSQuantifier_Zero: TSQuantifier = 0;
|
||||
pub const TSQuantifier_ZeroOrOne: TSQuantifier = 1;
|
||||
pub const TSQuantifier_ZeroOrMore: TSQuantifier = 2;
|
||||
pub const TSQuantifier_One: TSQuantifier = 3;
|
||||
pub const TSQuantifier_OneOrMore: TSQuantifier = 4;
|
||||
pub type TSQuantifier = u32;
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
|
|||
pub struct Query {
|
||||
ptr: NonNull<ffi::TSQuery>,
|
||||
capture_names: Vec<String>,
|
||||
capture_quantifiers: Vec<Quantifier>,
|
||||
capture_quantifiers: Vec<CaptureQuantifier>,
|
||||
text_predicates: Vec<Box<[TextPredicate]>>,
|
||||
property_settings: Vec<Box<[QueryProperty]>>,
|
||||
property_predicates: Vec<Box<[(QueryProperty, bool)]>>,
|
||||
|
|
@ -107,20 +107,20 @@ pub struct Query {
|
|||
|
||||
/// A quantifier for captures
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum Quantifier {
|
||||
pub enum CaptureQuantifier {
|
||||
One,
|
||||
OneOrMore,
|
||||
ZeroOrOne,
|
||||
ZeroOrMore,
|
||||
}
|
||||
|
||||
impl From<ffi::TSQuantifier> for Quantifier {
|
||||
impl From<ffi::TSQuantifier> for CaptureQuantifier {
|
||||
fn from(value: ffi::TSQuantifier) -> Self {
|
||||
match value {
|
||||
ffi::TSQuantifier_One => Quantifier::One,
|
||||
ffi::TSQuantifier_OneOrMore => Quantifier::OneOrMore,
|
||||
ffi::TSQuantifier_ZeroOrOne => Quantifier::ZeroOrOne,
|
||||
ffi::TSQuantifier_ZeroOrMore => Quantifier::ZeroOrMore,
|
||||
ffi::TSQuantifier_One => CaptureQuantifier::One,
|
||||
ffi::TSQuantifier_OneOrMore => CaptureQuantifier::OneOrMore,
|
||||
ffi::TSQuantifier_ZeroOrOne => CaptureQuantifier::ZeroOrOne,
|
||||
ffi::TSQuantifier_ZeroOrMore => CaptureQuantifier::ZeroOrMore,
|
||||
_ => panic!("Unrecognized quantifier: {}", value),
|
||||
}
|
||||
}
|
||||
|
|
@ -1550,7 +1550,7 @@ impl Query {
|
|||
}
|
||||
|
||||
/// Get the quantifiers of the captures used in the query.
|
||||
pub fn capture_quantifiers(&self) -> &[Quantifier] {
|
||||
pub fn capture_quantifiers(&self) -> &[CaptureQuantifier] {
|
||||
&self.capture_quantifiers
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -107,10 +107,11 @@ typedef struct {
|
|||
} TSQueryCapture;
|
||||
|
||||
typedef enum {
|
||||
One,
|
||||
OneOrMore,
|
||||
Zero = 0, // must match the array initialization value
|
||||
ZeroOrOne,
|
||||
ZeroOrMore,
|
||||
One,
|
||||
OneOrMore,
|
||||
} TSQuantifier;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
359
lib/src/query.c
359
lib/src/query.c
|
|
@ -118,9 +118,13 @@ typedef struct {
|
|||
typedef struct {
|
||||
Array(char) characters;
|
||||
Array(Slice) slices;
|
||||
Array(TSQuantifier) quantifiers;
|
||||
} SymbolTable;
|
||||
|
||||
/**
|
||||
* CaptureQuantififers - a data structure holding the quantifiers of pattern captures.
|
||||
*/
|
||||
typedef Array(TSQuantifier) CaptureQuantifiers;
|
||||
|
||||
/*
|
||||
* PatternEntry - Information about the starting point for matching a particular
|
||||
* pattern. These entries are stored in a 'pattern map' - a sorted array that
|
||||
|
|
@ -263,6 +267,7 @@ typedef struct {
|
|||
*/
|
||||
struct TSQuery {
|
||||
SymbolTable captures;
|
||||
CaptureQuantifiers capture_quantifiers;
|
||||
SymbolTable predicate_values;
|
||||
Array(QueryStep) steps;
|
||||
Array(PatternEntry) pattern_map;
|
||||
|
|
@ -458,31 +463,104 @@ static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
|
|||
* Quantifiers
|
||||
**************/
|
||||
|
||||
static TSQuantifier quantifier_mul(
|
||||
TSQuantifier left,
|
||||
TSQuantifier right
|
||||
) {
|
||||
switch (left)
|
||||
{
|
||||
case Zero:
|
||||
return Zero;
|
||||
case One:
|
||||
return right;
|
||||
case OneOrMore:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
return Zero;
|
||||
case ZeroOrOne:
|
||||
case ZeroOrMore:
|
||||
return ZeroOrMore;
|
||||
case One:
|
||||
case OneOrMore:
|
||||
return OneOrMore;
|
||||
};
|
||||
break;
|
||||
case ZeroOrOne:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
return Zero;
|
||||
case ZeroOrOne:
|
||||
case One:
|
||||
return ZeroOrOne;
|
||||
case ZeroOrMore:
|
||||
case OneOrMore:
|
||||
return ZeroOrMore;
|
||||
};
|
||||
break;
|
||||
case ZeroOrMore:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
return Zero;
|
||||
case ZeroOrOne:
|
||||
case ZeroOrMore:
|
||||
case One:
|
||||
case OneOrMore:
|
||||
return ZeroOrMore;
|
||||
};
|
||||
return ZeroOrMore;
|
||||
}
|
||||
}
|
||||
|
||||
static TSQuantifier quantifier_join(
|
||||
TSQuantifier left,
|
||||
TSQuantifier right
|
||||
) {
|
||||
switch (left)
|
||||
{
|
||||
case One:
|
||||
return right;
|
||||
case OneOrMore:
|
||||
case Zero:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
return Zero;
|
||||
case ZeroOrOne:
|
||||
case One:
|
||||
return ZeroOrOne;
|
||||
case ZeroOrMore:
|
||||
case OneOrMore:
|
||||
return ZeroOrMore;
|
||||
};
|
||||
break;
|
||||
case One:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
case ZeroOrOne:
|
||||
return ZeroOrOne;
|
||||
case ZeroOrMore:
|
||||
return ZeroOrMore;
|
||||
case One:
|
||||
return One;
|
||||
case OneOrMore:
|
||||
return OneOrMore;
|
||||
};
|
||||
break;
|
||||
case OneOrMore:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
case ZeroOrOne:
|
||||
case ZeroOrMore:
|
||||
return ZeroOrMore;
|
||||
case One:
|
||||
case OneOrMore:
|
||||
return OneOrMore;
|
||||
};
|
||||
break;
|
||||
case ZeroOrOne:
|
||||
switch (right) {
|
||||
case One:
|
||||
case Zero:
|
||||
case ZeroOrOne:
|
||||
case One:
|
||||
return ZeroOrOne;
|
||||
case OneOrMore:
|
||||
case ZeroOrMore:
|
||||
case OneOrMore:
|
||||
return ZeroOrMore;
|
||||
};
|
||||
break;
|
||||
|
|
@ -491,6 +569,148 @@ static TSQuantifier quantifier_join(
|
|||
}
|
||||
}
|
||||
|
||||
static TSQuantifier quantifier_add(
|
||||
TSQuantifier left,
|
||||
TSQuantifier right
|
||||
) {
|
||||
switch (left)
|
||||
{
|
||||
case Zero:
|
||||
return right;
|
||||
case One:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
return One;
|
||||
case ZeroOrOne:
|
||||
case ZeroOrMore:
|
||||
case One:
|
||||
case OneOrMore:
|
||||
return OneOrMore;
|
||||
};
|
||||
break;
|
||||
case OneOrMore:
|
||||
return OneOrMore;
|
||||
case ZeroOrOne:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
return ZeroOrOne;
|
||||
case ZeroOrOne:
|
||||
case ZeroOrMore:
|
||||
return ZeroOrMore;
|
||||
case One:
|
||||
case OneOrMore:
|
||||
return OneOrMore;
|
||||
};
|
||||
break;
|
||||
case ZeroOrMore:
|
||||
switch (right) {
|
||||
case Zero:
|
||||
return ZeroOrMore;
|
||||
case ZeroOrOne:
|
||||
case ZeroOrMore:
|
||||
return ZeroOrMore;
|
||||
case One:
|
||||
case OneOrMore:
|
||||
return OneOrMore;
|
||||
};
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Create new capture quantifiers structure
|
||||
static CaptureQuantifiers capture_quantifiers_new(void) {
|
||||
return (CaptureQuantifiers) array_new();
|
||||
}
|
||||
|
||||
// Delete capture quantifiers structure
|
||||
static void capture_quantifiers_delete(
|
||||
CaptureQuantifiers *self
|
||||
) {
|
||||
array_delete(self);
|
||||
}
|
||||
|
||||
// Clear capture quantifiers structure
|
||||
static void capture_quantifiers_clear(
|
||||
CaptureQuantifiers *self
|
||||
) {
|
||||
array_clear(self);
|
||||
}
|
||||
|
||||
// Replace capture quantifiers with the given quantifiers
|
||||
static void capture_quantifiers_replace(
|
||||
CaptureQuantifiers *self,
|
||||
CaptureQuantifiers *quantifiers
|
||||
) {
|
||||
array_clear(self);
|
||||
array_push_all(self, quantifiers);
|
||||
}
|
||||
|
||||
// Return capture quantifier for the given capture id
|
||||
static TSQuantifier capture_quantifier_for_id(
|
||||
const CaptureQuantifiers *self,
|
||||
uint16_t id
|
||||
) {
|
||||
return (self->size <= id) ? Zero : *array_get(self, id);
|
||||
}
|
||||
|
||||
// Add the given quantifier to the current value for id
|
||||
static void capture_quantifiers_add_for_id(
|
||||
CaptureQuantifiers *self,
|
||||
uint16_t id,
|
||||
TSQuantifier quantifier
|
||||
) {
|
||||
if (self->size <= id) {
|
||||
array_grow_by(self, id + 1 - self->size);
|
||||
}
|
||||
TSQuantifier *own_quantifier = array_get(self, id);
|
||||
*own_quantifier = quantifier_add(*own_quantifier, quantifier);
|
||||
}
|
||||
|
||||
// Point-wise add the given quantifiers to the current values
|
||||
static void capture_quantifiers_add_all(
|
||||
CaptureQuantifiers *self,
|
||||
CaptureQuantifiers *quantifiers
|
||||
) {
|
||||
if (self->size < quantifiers->size) {
|
||||
array_grow_by(self, quantifiers->size - self->size);
|
||||
}
|
||||
for (uint16_t id = 0; id < quantifiers->size; id++) {
|
||||
TSQuantifier *quantifier = array_get(quantifiers, id);
|
||||
TSQuantifier *own_quantifier = array_get(self, id);
|
||||
*own_quantifier = quantifier_add(*own_quantifier, *quantifier);
|
||||
}
|
||||
}
|
||||
|
||||
// Join the given quantifier with the current values
|
||||
static void capture_quantifiers_mul(
|
||||
CaptureQuantifiers *self,
|
||||
TSQuantifier quantifier
|
||||
) {
|
||||
for (uint16_t id = 0; id < self->size; id++) {
|
||||
TSQuantifier *own_quantifier = array_get(self, id);
|
||||
*own_quantifier = quantifier_mul(*own_quantifier, quantifier);
|
||||
}
|
||||
}
|
||||
|
||||
// Point-wise join the quantifiers from a list of alternatives with the current values
|
||||
static void capture_quantifiers_join_all(
|
||||
CaptureQuantifiers *self,
|
||||
CaptureQuantifiers *quantifiers
|
||||
) {
|
||||
if (self->size < quantifiers->size) {
|
||||
array_grow_by(self, quantifiers->size - self->size);
|
||||
}
|
||||
for (uint32_t id = 0; id < quantifiers->size; id++) {
|
||||
TSQuantifier *quantifier = array_get(quantifiers, id);
|
||||
TSQuantifier *own_quantifier = array_get(self, id);
|
||||
*own_quantifier = quantifier_join(*own_quantifier, *quantifier);
|
||||
}
|
||||
for (uint32_t id = quantifiers->size; id < self->size; id++) {
|
||||
TSQuantifier *own_quantifier = array_get(self, id);
|
||||
*own_quantifier = quantifier_join(*own_quantifier, Zero);
|
||||
}
|
||||
}
|
||||
|
||||
/**************
|
||||
* SymbolTable
|
||||
**************/
|
||||
|
|
@ -499,14 +719,12 @@ static SymbolTable symbol_table_new(void) {
|
|||
return (SymbolTable) {
|
||||
.characters = array_new(),
|
||||
.slices = array_new(),
|
||||
.quantifiers = array_new(),
|
||||
};
|
||||
}
|
||||
|
||||
static void symbol_table_delete(SymbolTable *self) {
|
||||
array_delete(&self->characters);
|
||||
array_delete(&self->slices);
|
||||
array_delete(&self->quantifiers);
|
||||
}
|
||||
|
||||
static int symbol_table_id_for_name(
|
||||
|
|
@ -534,13 +752,6 @@ static const char *symbol_table_name_for_id(
|
|||
return &self->characters.contents[slice.offset];
|
||||
}
|
||||
|
||||
static TSQuantifier symbol_table_quantifier_for_id(
|
||||
const SymbolTable *self,
|
||||
uint16_t id
|
||||
) {
|
||||
return self->quantifiers.contents[id];
|
||||
}
|
||||
|
||||
static uint16_t symbol_table_insert_name(
|
||||
SymbolTable *self,
|
||||
const char *name,
|
||||
|
|
@ -556,22 +767,9 @@ static uint16_t symbol_table_insert_name(
|
|||
memcpy(&self->characters.contents[slice.offset], name, length);
|
||||
self->characters.contents[self->characters.size - 1] = 0;
|
||||
array_push(&self->slices, slice);
|
||||
array_push(&self->quantifiers, One);
|
||||
return self->slices.size - 1;
|
||||
}
|
||||
|
||||
static void symbol_table_quantifiers_join(
|
||||
SymbolTable *self,
|
||||
TSQuantifier quantifier,
|
||||
uint32_t start_index,
|
||||
uint32_t end_index
|
||||
) {
|
||||
for (uint32_t index = start_index; index < end_index; index++) {
|
||||
TSQuantifier *joined_quantifier = &self->quantifiers.contents[index];
|
||||
*joined_quantifier = quantifier_join(quantifier, *joined_quantifier);
|
||||
}
|
||||
}
|
||||
|
||||
/************
|
||||
* QueryStep
|
||||
************/
|
||||
|
|
@ -1740,17 +1938,20 @@ static TSQueryError ts_query__parse_predicate(
|
|||
// Read one S-expression pattern from the stream, and incorporate it into
|
||||
// the query's internal state machine representation. For nested patterns,
|
||||
// this function calls itself recursively.
|
||||
//
|
||||
// The caller is repsonsible for passing in a dedicated CaptureQuantifiers.
|
||||
// These should not be shared between different calls to ts_query__parse_pattern!
|
||||
static TSQueryError ts_query__parse_pattern(
|
||||
TSQuery *self,
|
||||
Stream *stream,
|
||||
uint32_t depth,
|
||||
bool is_immediate
|
||||
bool is_immediate,
|
||||
CaptureQuantifiers *capture_quantifiers
|
||||
) {
|
||||
if (stream->next == 0) return TSQueryErrorSyntax;
|
||||
if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
|
||||
|
||||
const uint32_t starting_step_index = self->steps.size;
|
||||
const uint32_t starting_quantifier_index = self->captures.quantifiers.size;
|
||||
|
||||
// Store the byte offset of each step in the query.
|
||||
if (
|
||||
|
|
@ -1770,13 +1971,15 @@ static TSQueryError ts_query__parse_pattern(
|
|||
|
||||
// Parse each branch, and add a placeholder step in between the branches.
|
||||
Array(uint32_t) branch_step_indices = array_new();
|
||||
CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new();
|
||||
for (;;) {
|
||||
uint32_t start_index = self->steps.size;
|
||||
TSQueryError e = ts_query__parse_pattern(
|
||||
self,
|
||||
stream,
|
||||
depth,
|
||||
is_immediate
|
||||
is_immediate,
|
||||
&branch_capture_quantifiers
|
||||
);
|
||||
|
||||
if (e == PARENT_DONE) {
|
||||
|
|
@ -1787,12 +1990,20 @@ static TSQueryError ts_query__parse_pattern(
|
|||
e = TSQueryErrorSyntax;
|
||||
}
|
||||
if (e) {
|
||||
capture_quantifiers_delete(&branch_capture_quantifiers);
|
||||
array_delete(&branch_step_indices);
|
||||
return e;
|
||||
}
|
||||
|
||||
if(start_index == 0) {
|
||||
capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers);
|
||||
} else {
|
||||
capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers);
|
||||
}
|
||||
|
||||
array_push(&branch_step_indices, start_index);
|
||||
array_push(&self->steps, query_step__new(0, depth, false));
|
||||
capture_quantifiers_clear(&branch_capture_quantifiers);
|
||||
}
|
||||
(void)array_pop(&self->steps);
|
||||
|
||||
|
|
@ -1808,16 +2019,7 @@ static TSQueryError ts_query__parse_pattern(
|
|||
end_step->is_dead_end = true;
|
||||
}
|
||||
|
||||
if (branch_step_indices.size > 1) {
|
||||
const uint32_t ending_quantifier_index = self->captures.quantifiers.size;
|
||||
symbol_table_quantifiers_join(
|
||||
&self->captures,
|
||||
ZeroOrOne,
|
||||
starting_quantifier_index,
|
||||
ending_quantifier_index
|
||||
);
|
||||
}
|
||||
|
||||
capture_quantifiers_delete(&branch_capture_quantifiers);
|
||||
array_delete(&branch_step_indices);
|
||||
}
|
||||
|
||||
|
|
@ -1832,6 +2034,7 @@ static TSQueryError ts_query__parse_pattern(
|
|||
// If this parenthesis is followed by a node, then it represents a grouped sequence.
|
||||
if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
|
||||
bool child_is_immediate = false;
|
||||
CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
|
||||
for (;;) {
|
||||
if (stream->next == '.') {
|
||||
child_is_immediate = true;
|
||||
|
|
@ -1842,7 +2045,8 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth,
|
||||
child_is_immediate
|
||||
child_is_immediate,
|
||||
&child_capture_quantifiers
|
||||
);
|
||||
if (e == PARENT_DONE) {
|
||||
if (stream->next == ')') {
|
||||
|
|
@ -1851,10 +2055,17 @@ static TSQueryError ts_query__parse_pattern(
|
|||
}
|
||||
e = TSQueryErrorSyntax;
|
||||
}
|
||||
if (e) return e;
|
||||
if (e) {
|
||||
capture_quantifiers_delete(&child_capture_quantifiers);
|
||||
return e;
|
||||
}
|
||||
|
||||
capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
|
||||
|
||||
child_is_immediate = false;
|
||||
capture_quantifiers_clear(&child_capture_quantifiers);
|
||||
}
|
||||
capture_quantifiers_delete(&child_capture_quantifiers);
|
||||
}
|
||||
|
||||
// A dot/pound character indicates the start of a predicate.
|
||||
|
|
@ -1943,12 +2154,16 @@ static TSQueryError ts_query__parse_pattern(
|
|||
uint16_t last_child_step_index = 0;
|
||||
uint16_t negated_field_count = 0;
|
||||
TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
|
||||
CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
|
||||
for (;;) {
|
||||
// Parse a negated field assertion
|
||||
if (stream->next == '!') {
|
||||
stream_advance(stream);
|
||||
stream_skip_whitespace(stream);
|
||||
if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
|
||||
if (!stream_is_ident_start(stream)) {
|
||||
capture_quantifiers_delete(&child_capture_quantifiers);
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
const char *field_name = stream->input;
|
||||
stream_scan_identifier(stream);
|
||||
uint32_t length = stream->input - field_name;
|
||||
|
|
@ -1961,6 +2176,7 @@ static TSQueryError ts_query__parse_pattern(
|
|||
);
|
||||
if (!field_id) {
|
||||
stream->input = field_name;
|
||||
capture_quantifiers_delete(&child_capture_quantifiers);
|
||||
return TSQueryErrorField;
|
||||
}
|
||||
|
||||
|
|
@ -1985,12 +2201,16 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth + 1,
|
||||
child_is_immediate
|
||||
child_is_immediate,
|
||||
&child_capture_quantifiers
|
||||
);
|
||||
if (e == PARENT_DONE) {
|
||||
if (stream->next == ')') {
|
||||
if (child_is_immediate) {
|
||||
if (last_child_step_index == 0) return TSQueryErrorSyntax;
|
||||
if (last_child_step_index == 0) {
|
||||
capture_quantifiers_delete(&child_capture_quantifiers);
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
self->steps.contents[last_child_step_index].is_last_child = true;
|
||||
}
|
||||
|
||||
|
|
@ -2008,11 +2228,18 @@ static TSQueryError ts_query__parse_pattern(
|
|||
}
|
||||
e = TSQueryErrorSyntax;
|
||||
}
|
||||
if (e) return e;
|
||||
if (e) {
|
||||
capture_quantifiers_delete(&child_capture_quantifiers);
|
||||
return e;
|
||||
}
|
||||
|
||||
capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
|
||||
|
||||
last_child_step_index = step_index;
|
||||
child_is_immediate = false;
|
||||
capture_quantifiers_clear(&child_capture_quantifiers);
|
||||
}
|
||||
capture_quantifiers_delete(&child_capture_quantifiers);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2061,14 +2288,22 @@ static TSQueryError ts_query__parse_pattern(
|
|||
stream_skip_whitespace(stream);
|
||||
|
||||
// Parse the pattern
|
||||
CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new();
|
||||
TSQueryError e = ts_query__parse_pattern(
|
||||
self,
|
||||
stream,
|
||||
depth,
|
||||
is_immediate
|
||||
is_immediate,
|
||||
&field_capture_quantifiers
|
||||
);
|
||||
if (e == PARENT_DONE) return TSQueryErrorSyntax;
|
||||
if (e) return e;
|
||||
if (e == PARENT_DONE) {
|
||||
capture_quantifiers_delete(&field_capture_quantifiers);
|
||||
return TSQueryErrorSyntax;
|
||||
}
|
||||
if (e) {
|
||||
capture_quantifiers_delete(&field_capture_quantifiers);
|
||||
return e;
|
||||
}
|
||||
|
||||
// Add the field name to the first step of the pattern
|
||||
TSFieldId field_id = ts_language_field_id_for_name(
|
||||
|
|
@ -2096,6 +2331,9 @@ static TSQueryError ts_query__parse_pattern(
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers);
|
||||
capture_quantifiers_delete(&field_capture_quantifiers);
|
||||
}
|
||||
|
||||
else {
|
||||
|
|
@ -2175,6 +2413,10 @@ static TSQueryError ts_query__parse_pattern(
|
|||
for (;;) {
|
||||
QueryStep *step = &self->steps.contents[step_index];
|
||||
query_step__add_capture(step, capture_id);
|
||||
// Add only once, not for every branch, lest the quantifier will be '+' instead of '1'
|
||||
if (step_index == starting_step_index) {
|
||||
capture_quantifiers_add_for_id(capture_quantifiers, capture_id, One);
|
||||
}
|
||||
if (
|
||||
step->alternative_index != NONE &&
|
||||
step->alternative_index > step_index &&
|
||||
|
|
@ -2194,16 +2436,7 @@ static TSQueryError ts_query__parse_pattern(
|
|||
}
|
||||
}
|
||||
|
||||
// Patch capture quantifiers
|
||||
if (quantifier != One) {
|
||||
const uint32_t ending_quantifier_index = self->captures.quantifiers.size;
|
||||
symbol_table_quantifiers_join(
|
||||
&self->captures,
|
||||
quantifier,
|
||||
starting_quantifier_index,
|
||||
ending_quantifier_index
|
||||
);
|
||||
}
|
||||
capture_quantifiers_mul(capture_quantifiers, quantifier);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -2229,6 +2462,7 @@ TSQuery *ts_query_new(
|
|||
.steps = array_new(),
|
||||
.pattern_map = array_new(),
|
||||
.captures = symbol_table_new(),
|
||||
.capture_quantifiers = capture_quantifiers_new(),
|
||||
.predicate_values = symbol_table_new(),
|
||||
.predicate_steps = array_new(),
|
||||
.patterns = array_new(),
|
||||
|
|
@ -2253,7 +2487,7 @@ TSQuery *ts_query_new(
|
|||
.predicate_steps = (Slice) {.offset = start_predicate_step_index},
|
||||
.start_byte = stream_offset(&stream),
|
||||
}));
|
||||
*error_type = ts_query__parse_pattern(self, &stream, 0, false);
|
||||
*error_type = ts_query__parse_pattern(self, &stream, 0, false, &self->capture_quantifiers);
|
||||
array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
|
||||
|
||||
QueryPattern *pattern = array_back(&self->patterns);
|
||||
|
|
@ -2344,6 +2578,7 @@ void ts_query_delete(TSQuery *self) {
|
|||
array_delete(&self->negated_fields);
|
||||
symbol_table_delete(&self->captures);
|
||||
symbol_table_delete(&self->predicate_values);
|
||||
capture_quantifiers_delete(&self->capture_quantifiers);
|
||||
ts_free(self);
|
||||
}
|
||||
}
|
||||
|
|
@ -2372,7 +2607,7 @@ TSQuantifier ts_query_capture_quantifier_for_id(
|
|||
const TSQuery *self,
|
||||
uint32_t index
|
||||
) {
|
||||
return symbol_table_quantifier_for_id(&self->captures, index);
|
||||
return capture_quantifier_for_id(&self->capture_quantifiers, index);
|
||||
}
|
||||
|
||||
const char *ts_query_string_value_for_id(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue