Mark steps as definite on query construction
* Add a ts_query_pattern_is_definite API, just for debugging this * Store state_count on TSLanguage structs, to allow for scanning parse tables
This commit is contained in:
parent
6a46dff89a
commit
4c2f36a07b
10 changed files with 755 additions and 76 deletions
|
|
@ -95,11 +95,7 @@ impl Generator {
|
|||
self.add_stats();
|
||||
self.add_symbol_enum();
|
||||
self.add_symbol_names_list();
|
||||
|
||||
if self.next_abi {
|
||||
self.add_unique_symbol_map();
|
||||
}
|
||||
|
||||
self.add_unique_symbol_map();
|
||||
self.add_symbol_metadata_list();
|
||||
|
||||
if !self.field_names.is_empty() {
|
||||
|
|
@ -177,20 +173,16 @@ impl Generator {
|
|||
// If we are opting in to the new unstable language ABI, then use the concept of
|
||||
// "small parse states". Otherwise, use the same representation for all parse
|
||||
// states.
|
||||
if self.next_abi {
|
||||
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
||||
self.large_state_count = self
|
||||
.parse_table
|
||||
.states
|
||||
.iter()
|
||||
.enumerate()
|
||||
.take_while(|(i, s)| {
|
||||
*i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
|
||||
})
|
||||
.count();
|
||||
} else {
|
||||
self.large_state_count = self.parse_table.states.len();
|
||||
}
|
||||
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
||||
self.large_state_count = self
|
||||
.parse_table
|
||||
.states
|
||||
.iter()
|
||||
.enumerate()
|
||||
.take_while(|(i, s)| {
|
||||
*i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
|
||||
})
|
||||
.count();
|
||||
}
|
||||
|
||||
fn add_includes(&mut self) {
|
||||
|
|
@ -256,10 +248,7 @@ impl Generator {
|
|||
"#define STATE_COUNT {}",
|
||||
self.parse_table.states.len()
|
||||
);
|
||||
|
||||
if self.next_abi {
|
||||
add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
|
||||
}
|
||||
add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
|
||||
|
||||
add_line!(
|
||||
self,
|
||||
|
|
@ -689,17 +678,12 @@ impl Generator {
|
|||
name
|
||||
);
|
||||
indent!(self);
|
||||
|
||||
add_line!(self, "START_LEXER();");
|
||||
|
||||
if self.next_abi {
|
||||
add_line!(self, "eof = lexer->eof(lexer);");
|
||||
} else {
|
||||
add_line!(self, "eof = lookahead == 0;");
|
||||
}
|
||||
|
||||
add_line!(self, "eof = lexer->eof(lexer);");
|
||||
add_line!(self, "switch (state) {{");
|
||||
indent!(self);
|
||||
|
||||
indent!(self);
|
||||
for (i, state) in lex_table.states.into_iter().enumerate() {
|
||||
add_line!(self, "case {}:", i);
|
||||
indent!(self);
|
||||
|
|
@ -714,6 +698,7 @@ impl Generator {
|
|||
|
||||
dedent!(self);
|
||||
add_line!(self, "}}");
|
||||
|
||||
dedent!(self);
|
||||
add_line!(self, "}}");
|
||||
add_line!(self, "");
|
||||
|
|
@ -967,12 +952,7 @@ impl Generator {
|
|||
|
||||
add_line!(
|
||||
self,
|
||||
"static uint16_t ts_parse_table[{}][SYMBOL_COUNT] = {{",
|
||||
if self.next_abi {
|
||||
"LARGE_STATE_COUNT"
|
||||
} else {
|
||||
"STATE_COUNT"
|
||||
}
|
||||
"static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{",
|
||||
);
|
||||
indent!(self);
|
||||
|
||||
|
|
@ -1224,9 +1204,10 @@ impl Generator {
|
|||
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
|
||||
add_line!(self, ".alias_count = ALIAS_COUNT,");
|
||||
add_line!(self, ".token_count = TOKEN_COUNT,");
|
||||
add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
|
||||
|
||||
if self.next_abi {
|
||||
add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
|
||||
add_line!(self, ".state_count = STATE_COUNT,");
|
||||
}
|
||||
|
||||
add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
|
||||
|
|
@ -1249,10 +1230,7 @@ impl Generator {
|
|||
add_line!(self, ".parse_actions = ts_parse_actions,");
|
||||
add_line!(self, ".lex_modes = ts_lex_modes,");
|
||||
add_line!(self, ".symbol_names = ts_symbol_names,");
|
||||
|
||||
if self.next_abi {
|
||||
add_line!(self, ".public_symbol_map = ts_symbol_map,");
|
||||
}
|
||||
add_line!(self, ".public_symbol_map = ts_symbol_map,");
|
||||
|
||||
if !self.parse_table.production_infos.is_empty() {
|
||||
add_line!(
|
||||
|
|
|
|||
|
|
@ -149,8 +149,14 @@ fn run() -> error::Result<()> {
|
|||
.arg(Arg::with_name("path").index(1).multiple(true)),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("web-ui").about("Test a parser interactively in the browser")
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q").help("open in default browser")),
|
||||
SubCommand::with_name("web-ui")
|
||||
.about("Test a parser interactively in the browser")
|
||||
.arg(
|
||||
Arg::with_name("quiet")
|
||||
.long("quiet")
|
||||
.short("q")
|
||||
.help("open in default browser"),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("dump-languages")
|
||||
|
|
|
|||
|
|
@ -1919,7 +1919,7 @@ fn test_query_start_byte_for_pattern() {
|
|||
let patterns_3 = "
|
||||
((identifier) @b (#match? @b i))
|
||||
(function_declaration name: (identifier) @c)
|
||||
(method_definition name: (identifier) @d)
|
||||
(method_definition name: (property_identifier) @d)
|
||||
"
|
||||
.trim_start();
|
||||
|
||||
|
|
@ -2048,6 +2048,76 @@ fn test_query_disable_pattern() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_is_definite() {
|
||||
struct Row {
|
||||
pattern: &'static str,
|
||||
results_by_step_index: &'static [(usize, bool)],
|
||||
}
|
||||
|
||||
let rows = &[
|
||||
Row {
|
||||
pattern: r#"(object "{" "}")"#,
|
||||
results_by_step_index: &[
|
||||
(0, false),
|
||||
(1, true), // "{"
|
||||
(2, true), // "}"
|
||||
],
|
||||
},
|
||||
Row {
|
||||
pattern: r#"(pair (property_identifier) ":")"#,
|
||||
results_by_step_index: &[
|
||||
(0, false),
|
||||
(1, false), // property_identifier
|
||||
(2, true), // ":""
|
||||
],
|
||||
},
|
||||
Row {
|
||||
pattern: r#"(object "{" (_) "}")"#,
|
||||
results_by_step_index: &[
|
||||
(0, false),
|
||||
(1, false), // "{""
|
||||
(2, false), // (_)
|
||||
(3, true), // "}"
|
||||
],
|
||||
},
|
||||
Row {
|
||||
// Named wildcards, fields
|
||||
pattern: r#"(binary_expression left: (identifier) right: (_))"#,
|
||||
results_by_step_index: &[
|
||||
(0, false),
|
||||
(1, false), // identifier
|
||||
(2, true), // (_)
|
||||
],
|
||||
},
|
||||
Row {
|
||||
pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#,
|
||||
results_by_step_index: &[
|
||||
(0, false),
|
||||
(1, true), // identifier
|
||||
(2, true), // statement_block
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
allocations::record(|| {
|
||||
let language = get_language("javascript");
|
||||
for row in rows.iter() {
|
||||
let query = Query::new(language, row.pattern).unwrap();
|
||||
for (step_index, is_definite) in row.results_by_step_index {
|
||||
assert_eq!(
|
||||
query.pattern_is_definite(0, *step_index),
|
||||
*is_definite,
|
||||
"Pattern: {:?}, step: {}, expected is_definite to be {}",
|
||||
row.pattern,
|
||||
step_index,
|
||||
is_definite,
|
||||
)
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn assert_query_matches(
|
||||
language: Language,
|
||||
query: &Query,
|
||||
|
|
|
|||
|
|
@ -172,9 +172,9 @@ extern "C" {
|
|||
#[doc = " the given ranges must be ordered from earliest to latest in the document,"]
|
||||
#[doc = " and they must not overlap. That is, the following must hold for all"]
|
||||
#[doc = " `i` < `length - 1`:"]
|
||||
#[doc = " ```text"]
|
||||
#[doc = ""]
|
||||
#[doc = " ranges[i].end_byte <= ranges[i + 1].start_byte"]
|
||||
#[doc = " ```"]
|
||||
#[doc = ""]
|
||||
#[doc = " If this requirement is not satisfied, the operation will fail, the ranges"]
|
||||
#[doc = " will not be assigned, and this function will return `false`. On success,"]
|
||||
#[doc = " this function returns `true`"]
|
||||
|
|
@ -649,6 +649,13 @@ extern "C" {
|
|||
length: *mut u32,
|
||||
) -> *const TSQueryPredicateStep;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_query_pattern_is_definite(
|
||||
self_: *const TSQuery,
|
||||
pattern_index: u32,
|
||||
step_index: u32,
|
||||
) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Get the name and length of one of the query\'s captures, or one of the"]
|
||||
#[doc = " query\'s string literals. Each capture and string is associated with a"]
|
||||
|
|
@ -800,5 +807,5 @@ extern "C" {
|
|||
pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
|
||||
}
|
||||
|
||||
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 11;
|
||||
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 12;
|
||||
pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9;
|
||||
|
|
|
|||
|
|
@ -1449,6 +1449,14 @@ impl Query {
|
|||
unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) }
|
||||
}
|
||||
|
||||
/// Check if a pattern will definitely match after a certain number of steps
|
||||
/// have matched.
|
||||
pub fn pattern_is_definite(&self, index: usize, step_index: usize) -> bool {
|
||||
unsafe {
|
||||
ffi::ts_query_pattern_is_definite(self.ptr.as_ptr(), index as u32, step_index as u32)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_property(
|
||||
function_name: &str,
|
||||
capture_names: &[String],
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ extern "C" {
|
|||
* The Tree-sitter library is generally backwards-compatible with languages
|
||||
* generated using older CLI versions, but is not forwards-compatible.
|
||||
*/
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 11
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 12
|
||||
|
||||
/**
|
||||
* The earliest ABI version that is supported by the current version of the
|
||||
|
|
@ -718,6 +718,12 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern(
|
|||
uint32_t *length
|
||||
);
|
||||
|
||||
bool ts_query_pattern_is_definite(
|
||||
const TSQuery *self,
|
||||
uint32_t pattern_index,
|
||||
uint32_t step_index
|
||||
);
|
||||
|
||||
/**
|
||||
* Get the name and length of one of the query's captures, or one of the
|
||||
* query's string literals. Each capture and string is associated with a
|
||||
|
|
|
|||
|
|
@ -119,6 +119,7 @@ struct TSLanguage {
|
|||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;
|
||||
const TSSymbol *public_symbol_map;
|
||||
uint32_t state_count;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -66,6 +66,30 @@ extern "C" {
|
|||
#define array_assign(self, other) \
|
||||
array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
|
||||
|
||||
#define array_search_sorted_by(self, start, field, needle, out_index, out_exists) \
|
||||
do { \
|
||||
*(out_exists) = false; \
|
||||
for (*(out_index) = start; *(out_index) < (self)->size; (*(out_index))++) { \
|
||||
int _comparison = (int)((self)->contents[*(out_index)] field) - (int)(needle); \
|
||||
if (_comparison >= 0) { \
|
||||
if (_comparison == 0) *(out_exists) = true; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
#define array_search_sorted_with(self, start, compare, needle, out_index, out_exists) \
|
||||
do { \
|
||||
*(out_exists) = false; \
|
||||
for (*(out_index) = start; *(out_index) < (self)->size; (*(out_index))++) { \
|
||||
int _comparison = compare(&(self)->contents[*(out_index)], (needle)); \
|
||||
if (_comparison >= 0) { \
|
||||
if (_comparison == 0) *(out_exists) = true; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
// Private
|
||||
|
||||
typedef Array(void) VoidArray;
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ extern "C" {
|
|||
#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
|
||||
#define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11
|
||||
#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11
|
||||
#define TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT 12
|
||||
|
||||
typedef struct {
|
||||
const TSParseAction *actions;
|
||||
|
|
|
|||
632
lib/src/query.c
632
lib/src/query.c
|
|
@ -14,6 +14,8 @@
|
|||
#define MAX_STATE_COUNT 256
|
||||
#define MAX_CAPTURE_LIST_COUNT 32
|
||||
#define MAX_STEP_CAPTURE_COUNT 3
|
||||
#define MAX_STATE_PREDECESSOR_COUNT 100
|
||||
#define MAX_WALK_STATE_DEPTH 4
|
||||
|
||||
/*
|
||||
* Stream - A sequence of unicode characters derived from a UTF8 string.
|
||||
|
|
@ -55,6 +57,7 @@ typedef struct {
|
|||
bool is_pass_through: 1;
|
||||
bool is_dead_end: 1;
|
||||
bool alternative_is_immediate: 1;
|
||||
bool is_definite: 1;
|
||||
} QueryStep;
|
||||
|
||||
/*
|
||||
|
|
@ -89,6 +92,12 @@ typedef struct {
|
|||
uint16_t pattern_index;
|
||||
} PatternEntry;
|
||||
|
||||
typedef struct {
|
||||
Slice predicate_steps;
|
||||
uint32_t start_byte;
|
||||
uint32_t start_step;
|
||||
} QueryPattern;
|
||||
|
||||
/*
|
||||
* QueryState - The state of an in-progress match of a particular pattern
|
||||
* in a query. While executing, a `TSQueryCursor` must keep track of a number
|
||||
|
|
@ -138,6 +147,31 @@ typedef struct {
|
|||
uint32_t usage_map;
|
||||
} CaptureListPool;
|
||||
|
||||
/*
|
||||
* WalkState - The state needed for walking the parse table when analyzing
|
||||
* a query pattern, to determine the steps where the pattern could fail
|
||||
* to match.
|
||||
*/
|
||||
typedef struct {
|
||||
TSStateId state;
|
||||
TSSymbol parent_symbol;
|
||||
uint16_t child_index;
|
||||
TSFieldId field;
|
||||
} WalkStateEntry;
|
||||
|
||||
typedef struct {
|
||||
WalkStateEntry stack[MAX_WALK_STATE_DEPTH];
|
||||
uint16_t depth;
|
||||
uint16_t step_index;
|
||||
} WalkState;
|
||||
|
||||
/*
|
||||
* StatePredecessorMap - A map that stores the predecessors of each parse state.
|
||||
*/
|
||||
typedef struct {
|
||||
TSStateId *contents;
|
||||
} StatePredecessorMap;
|
||||
|
||||
/*
|
||||
* TSQuery - A tree query, compiled from a string of S-expressions. The query
|
||||
* itself is immutable. The mutable state used in the process of executing the
|
||||
|
|
@ -149,8 +183,7 @@ struct TSQuery {
|
|||
Array(QueryStep) steps;
|
||||
Array(PatternEntry) pattern_map;
|
||||
Array(TSQueryPredicateStep) predicate_steps;
|
||||
Array(Slice) predicates_by_pattern;
|
||||
Array(uint32_t) start_bytes_by_pattern;
|
||||
Array(QueryPattern) patterns;
|
||||
const TSLanguage *language;
|
||||
uint16_t wildcard_root_pattern_count;
|
||||
TSSymbol *symbol_map;
|
||||
|
|
@ -451,6 +484,7 @@ static QueryStep query_step__new(
|
|||
.is_pattern_start = false,
|
||||
.is_pass_through = false,
|
||||
.is_dead_end = false,
|
||||
.is_definite = false,
|
||||
.is_immediate = is_immediate,
|
||||
.alternative_is_immediate = false,
|
||||
};
|
||||
|
|
@ -480,6 +514,67 @@ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
|
|||
}
|
||||
}
|
||||
|
||||
/**********************
|
||||
* StatePredecessorMap
|
||||
**********************/
|
||||
|
||||
static inline StatePredecessorMap state_predecessor_map_new(const TSLanguage *language) {
|
||||
return (StatePredecessorMap) {
|
||||
.contents = ts_calloc(language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), sizeof(TSStateId)),
|
||||
};
|
||||
}
|
||||
|
||||
static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
|
||||
ts_free(self->contents);
|
||||
}
|
||||
|
||||
static inline void state_predecessor_map_add(
|
||||
StatePredecessorMap *self,
|
||||
TSStateId state,
|
||||
TSStateId predecessor
|
||||
) {
|
||||
unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
|
||||
TSStateId *count = &self->contents[index];
|
||||
if (*count == 0 || (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)) {
|
||||
(*count)++;
|
||||
self->contents[index + *count] = predecessor;
|
||||
}
|
||||
}
|
||||
|
||||
static inline const TSStateId *state_predecessor_map_get(
|
||||
const StatePredecessorMap *self,
|
||||
TSStateId state,
|
||||
unsigned *count
|
||||
) {
|
||||
unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
|
||||
*count = self->contents[index];
|
||||
return &self->contents[index + 1];
|
||||
}
|
||||
|
||||
/************
|
||||
* WalkState
|
||||
************/
|
||||
|
||||
static inline int walk_state__compare(WalkState *self, WalkState *other) {
|
||||
if (self->depth < other->depth) return -1;
|
||||
if (self->depth > other->depth) return 1;
|
||||
if (self->step_index < other->step_index) return -1;
|
||||
if (self->step_index > other->step_index) return 1;
|
||||
for (unsigned i = 0; i < self->depth; i++) {
|
||||
if (self->stack[i].state < other->stack[i].state) return -1;
|
||||
if (self->stack[i].state > other->stack[i].state) return 1;
|
||||
if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1;
|
||||
if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1;
|
||||
if (self->stack[i].child_index < other->stack[i].child_index) return -1;
|
||||
if (self->stack[i].child_index > other->stack[i].child_index) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline WalkStateEntry *walk_state__top(WalkState *self) {
|
||||
return &self->stack[self->depth - 1];
|
||||
}
|
||||
|
||||
/*********
|
||||
* Query
|
||||
*********/
|
||||
|
|
@ -552,6 +647,466 @@ static inline void ts_query__pattern_map_insert(
|
|||
}));
|
||||
}
|
||||
|
||||
static void ts_query__analyze_patterns(TSQuery *self) {
|
||||
typedef struct {
|
||||
TSSymbol parent_symbol;
|
||||
uint32_t parent_step_index;
|
||||
Array(uint32_t) child_step_indices;
|
||||
} ParentPattern;
|
||||
|
||||
typedef struct {
|
||||
TSStateId state;
|
||||
uint8_t child_index;
|
||||
uint8_t production_id;
|
||||
bool done;
|
||||
} SubgraphNode;
|
||||
|
||||
typedef struct {
|
||||
TSSymbol symbol;
|
||||
Array(TSStateId) start_states;
|
||||
Array(SubgraphNode) nodes;
|
||||
} SymbolSubgraph;
|
||||
|
||||
typedef Array(WalkState) WalkStateList;
|
||||
|
||||
// Identify all of the patterns in the query that have child patterns. This
|
||||
// includes both top-level patterns and patterns that are nested within some
|
||||
// larger pattern. For each of these, record the parent symbol, the step index
|
||||
// and all of the immediate child step indices in reverse order.
|
||||
Array(ParentPattern) parent_patterns = array_new();
|
||||
Array(uint32_t) stack = array_new();
|
||||
for (unsigned i = 0; i < self->steps.size; i++) {
|
||||
QueryStep *step = &self->steps.contents[i];
|
||||
if (step->depth == PATTERN_DONE_MARKER) {
|
||||
array_clear(&stack);
|
||||
} else {
|
||||
uint32_t parent_pattern_index = 0;
|
||||
while (stack.size > 0) {
|
||||
parent_pattern_index = *array_back(&stack);
|
||||
ParentPattern *parent_pattern = &parent_patterns.contents[parent_pattern_index];
|
||||
QueryStep *parent_step = &self->steps.contents[parent_pattern->parent_step_index];
|
||||
if (parent_step->depth >= step->depth) {
|
||||
stack.size--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (stack.size > 0) {
|
||||
ParentPattern *parent_pattern = &parent_patterns.contents[parent_pattern_index];
|
||||
step->is_definite = true;
|
||||
array_push(&parent_pattern->child_step_indices, i);
|
||||
}
|
||||
|
||||
array_push(&stack, parent_patterns.size);
|
||||
array_push(&parent_patterns, ((ParentPattern) {
|
||||
.parent_symbol = step->symbol,
|
||||
.parent_step_index = i,
|
||||
}));
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
ParentPattern *parent_pattern = &parent_patterns.contents[i];
|
||||
if (parent_pattern->child_step_indices.size == 0) {
|
||||
array_erase(&parent_patterns, i);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
// Debug
|
||||
// {
|
||||
// printf("\nParent pattern entries\n");
|
||||
// for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
// ParentPattern *parent_pattern = &parent_patterns.contents[i];
|
||||
// printf(" %s ->", ts_language_symbol_name(self->language, parent_pattern->parent_symbol));
|
||||
// for (unsigned j = 0; j < parent_pattern->child_step_indices.size; j++) {
|
||||
// QueryStep *step = &self->steps.contents[parent_pattern->child_step_indices.contents[j]];
|
||||
// printf(" %s", ts_language_symbol_name(self->language, step->symbol));
|
||||
// }
|
||||
// printf("\n");
|
||||
// }
|
||||
// }
|
||||
|
||||
// Initialize a set of subgraphs, with one subgraph for each parent symbol,
|
||||
// in the query, and one subgraph for each hidden symbol.
|
||||
unsigned subgraph_index = 0, exists;
|
||||
Array(SymbolSubgraph) subgraphs = array_new();
|
||||
for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
TSSymbol parent_symbol = parent_patterns.contents[i].parent_symbol;
|
||||
array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists);
|
||||
if (!exists) {
|
||||
array_insert(&subgraphs, subgraph_index, ((SymbolSubgraph) { .symbol = parent_symbol, }));
|
||||
}
|
||||
}
|
||||
subgraph_index = 0;
|
||||
for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) {
|
||||
if (!ts_language_symbol_metadata(self->language, sym).visible) {
|
||||
array_search_sorted_by(
|
||||
&subgraphs, subgraph_index,
|
||||
.symbol, sym,
|
||||
&subgraph_index, &exists
|
||||
);
|
||||
if (!exists) {
|
||||
array_insert(&subgraphs, subgraph_index, ((SymbolSubgraph) { .symbol = sym, }));
|
||||
subgraph_index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the parse table to find the data needed for these subgraphs.
|
||||
// Collect three things during this scan:
|
||||
// 1) All of the parse states where one of these symbols can start.
|
||||
// 2) All of the parse states where one of these symbols can end, along
|
||||
// with information about the node that would be created.
|
||||
// 3) A list of predecessor states for each state.
|
||||
StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
|
||||
for (TSStateId state = 1; state < self->language->state_count; state++) {
|
||||
unsigned subgraph_index = 0, exists;
|
||||
for (TSSymbol sym = 0; sym < self->language->token_count; sym++) {
|
||||
unsigned count;
|
||||
const TSParseAction *actions = ts_language_actions(self->language, state, sym, &count);
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
const TSParseAction *action = &actions[i];
|
||||
if (action->type == TSParseActionTypeReduce) {
|
||||
unsigned exists;
|
||||
array_search_sorted_by(
|
||||
&subgraphs,
|
||||
subgraph_index,
|
||||
.symbol,
|
||||
action->params.reduce.symbol,
|
||||
&subgraph_index,
|
||||
&exists
|
||||
);
|
||||
if (exists) {
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
||||
if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
|
||||
array_push(&subgraph->nodes, ((SubgraphNode) {
|
||||
.state = state,
|
||||
.production_id = action->params.reduce.production_id,
|
||||
.child_index = action->params.reduce.child_count,
|
||||
.done = true,
|
||||
}));
|
||||
}
|
||||
}
|
||||
} else if (
|
||||
action->type == TSParseActionTypeShift &&
|
||||
!action->params.shift.extra
|
||||
) {
|
||||
TSStateId next_state = action->params.shift.state;
|
||||
state_predecessor_map_add(&predecessor_map, next_state, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) {
|
||||
TSStateId next_state = ts_language_next_state(self->language, state, sym);
|
||||
if (next_state != 0) {
|
||||
state_predecessor_map_add(&predecessor_map, next_state, state);
|
||||
array_search_sorted_by(
|
||||
&subgraphs,
|
||||
subgraph_index,
|
||||
.symbol,
|
||||
sym,
|
||||
&subgraph_index,
|
||||
&exists
|
||||
);
|
||||
if (exists) {
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
||||
array_push(&subgraph->start_states, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For each subgraph, compute the remainder of the nodes by walking backward
|
||||
// from the end states using the predecessor map.
|
||||
Array(SubgraphNode) next_nodes = array_new();
|
||||
for (unsigned i = 0; i < subgraphs.size; i++) {
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[i];
|
||||
if (subgraph->nodes.size == 0) {
|
||||
array_delete(&subgraph->start_states);
|
||||
array_erase(&subgraphs, i);
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
array_assign(&next_nodes, &subgraph->nodes);
|
||||
while (next_nodes.size > 0) {
|
||||
SubgraphNode node = array_pop(&next_nodes);
|
||||
if (node.child_index > 1) {
|
||||
unsigned predecessor_count;
|
||||
const TSStateId *predecessors = state_predecessor_map_get(
|
||||
&predecessor_map,
|
||||
node.state,
|
||||
&predecessor_count
|
||||
);
|
||||
for (unsigned j = 0; j < predecessor_count; j++) {
|
||||
SubgraphNode predecessor_node = {
|
||||
.state = predecessors[j],
|
||||
.child_index = node.child_index - 1,
|
||||
.production_id = node.production_id,
|
||||
.done = false,
|
||||
};
|
||||
unsigned index, exists;
|
||||
array_search_sorted_by(&subgraph->nodes, 0, .state, predecessor_node.state, &index, &exists);
|
||||
if (!exists) {
|
||||
array_insert(&subgraph->nodes, index, predecessor_node);
|
||||
array_push(&next_nodes, predecessor_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Debug
|
||||
// {
|
||||
// printf("\nSubgraphs:\n");
|
||||
// for (unsigned i = 0; i < subgraphs.size; i++) {
|
||||
// SymbolSubgraph *subgraph = &subgraphs.contents[i];
|
||||
// printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
|
||||
// for (unsigned j = 0; j < subgraph->nodes.size; j++) {
|
||||
// SubgraphNode *node = &subgraph->nodes.contents[j];
|
||||
// printf(" {state: %u, child_index: %u}\n", node->state, node->child_index);
|
||||
// }
|
||||
// printf("\n");
|
||||
// }
|
||||
// }
|
||||
|
||||
// For each non-terminal pattern, determine if the pattern can successfully match,
|
||||
// and all of the possible children within the pattern where matching could fail.
|
||||
WalkStateList walk_states = array_new();
|
||||
WalkStateList next_walk_states = array_new();
|
||||
Array(uint16_t) finished_step_indices = array_new();
|
||||
for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
ParentPattern *parent_pattern = &parent_patterns.contents[i];
|
||||
unsigned subgraph_index, exists;
|
||||
array_search_sorted_by(&subgraphs, 0, .symbol, parent_pattern->parent_symbol, &subgraph_index, &exists);
|
||||
if (!exists) {
|
||||
// TODO - what to do for ERROR patterns
|
||||
continue;
|
||||
}
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
||||
|
||||
// Initialize a walk at every possible parse state where this non-terminal
|
||||
// symbol can start.
|
||||
array_clear(&walk_states);
|
||||
for (unsigned j = 0; j < subgraph->start_states.size; j++) {
|
||||
TSStateId state = subgraph->start_states.contents[j];
|
||||
array_push(&walk_states, ((WalkState) {
|
||||
.step_index = 0,
|
||||
.stack = {
|
||||
[0] = {
|
||||
.state = state,
|
||||
.child_index = 0,
|
||||
.parent_symbol = subgraph->symbol,
|
||||
.field = 0,
|
||||
},
|
||||
},
|
||||
.depth = 1,
|
||||
}));
|
||||
}
|
||||
|
||||
// Walk the subgraph for this non-terminal, tracking all of the possible
|
||||
// sequences of progress within the pattern.
|
||||
array_clear(&finished_step_indices);
|
||||
while (walk_states.size > 0) {
|
||||
// Debug
|
||||
// {
|
||||
// printf("Walk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol));
|
||||
// for (unsigned j = 0; j < walk_states.size; j++) {
|
||||
// WalkState *walk_state = &walk_states.contents[j];
|
||||
// printf(
|
||||
// " %u: {depth: %u, step: %u, state: %u, child_index: %u, parent: %s}\n",
|
||||
// j,
|
||||
// walk_state->depth,
|
||||
// walk_state->step_index,
|
||||
// walk_state->stack[walk_state->depth - 1].state,
|
||||
// walk_state->stack[walk_state->depth - 1].child_index,
|
||||
// ts_language_symbol_name(self->language, walk_state->stack[walk_state->depth - 1].parent_symbol)
|
||||
// );
|
||||
// }
|
||||
|
||||
// printf("\nFinished step indices for %u %s:", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol));
|
||||
// for (unsigned j = 0; j < finished_step_indices.size; j++) {
|
||||
// printf(" %u", finished_step_indices.contents[j]);
|
||||
// }
|
||||
// printf("\n\n");
|
||||
// }
|
||||
|
||||
array_clear(&next_walk_states);
|
||||
for (unsigned j = 0; j < walk_states.size; j++) {
|
||||
WalkState *walk_state = &walk_states.contents[j];
|
||||
TSStateId state = walk_state->stack[walk_state->depth - 1].state;
|
||||
unsigned child_index = walk_state->stack[walk_state->depth - 1].child_index;
|
||||
TSSymbol parent_symbol = walk_state->stack[walk_state->depth - 1].parent_symbol;
|
||||
|
||||
unsigned subgraph_index, exists;
|
||||
array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists);
|
||||
if (!exists) continue;
|
||||
SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
||||
|
||||
for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) {
|
||||
TSStateId successor_state = ts_language_next_state(self->language, state, sym);
|
||||
if (successor_state && successor_state != state) {
|
||||
unsigned node_index;
|
||||
array_search_sorted_by(&subgraph->nodes, 0, .state, successor_state, &node_index, &exists);
|
||||
if (exists) {
|
||||
SubgraphNode *node = &subgraph->nodes.contents[node_index];
|
||||
if (node->child_index != child_index + 1) continue;
|
||||
|
||||
WalkState next_walk_state = *walk_state;
|
||||
walk_state__top(&next_walk_state)->child_index++;
|
||||
walk_state__top(&next_walk_state)->state = successor_state;
|
||||
|
||||
bool does_match = true;
|
||||
unsigned step_index = parent_pattern->child_step_indices.contents[walk_state->step_index];
|
||||
QueryStep *step = &self->steps.contents[step_index];
|
||||
TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
|
||||
TSSymbol visible_symbol = alias
|
||||
? alias
|
||||
: self->language->symbol_metadata[sym].visible
|
||||
? self->language->public_symbol_map[sym]
|
||||
: 0;
|
||||
if (visible_symbol) {
|
||||
if (step->symbol == NAMED_WILDCARD_SYMBOL) {
|
||||
if (!ts_language_symbol_metadata(self->language, visible_symbol).named) does_match = false;
|
||||
} else if (step->symbol != WILDCARD_SYMBOL) {
|
||||
if (step->symbol != visible_symbol) does_match = false;
|
||||
}
|
||||
} else if (next_walk_state.depth < MAX_WALK_STATE_DEPTH) {
|
||||
does_match = false;
|
||||
next_walk_state.depth++;
|
||||
walk_state__top(&next_walk_state)->state = state;
|
||||
walk_state__top(&next_walk_state)->child_index = 0;
|
||||
walk_state__top(&next_walk_state)->parent_symbol = sym;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
TSFieldId field_id = 0;
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
|
||||
for (; field_map != field_map_end; field_map++) {
|
||||
if (field_map->child_index == child_index) {
|
||||
field_id = field_map->field_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (does_match) {
|
||||
next_walk_state.step_index++;
|
||||
}
|
||||
|
||||
if (node->done) {
|
||||
next_walk_state.depth--;
|
||||
}
|
||||
|
||||
if (
|
||||
next_walk_state.depth == 0 ||
|
||||
next_walk_state.step_index == parent_pattern->child_step_indices.size
|
||||
) {
|
||||
unsigned index, exists;
|
||||
array_search_sorted_by(&finished_step_indices, 0, , next_walk_state.step_index, &index, &exists);
|
||||
if (!exists) array_insert(&finished_step_indices, index, next_walk_state.step_index);
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned index, exists;
|
||||
array_search_sorted_with(
|
||||
&next_walk_states,
|
||||
0,
|
||||
walk_state__compare,
|
||||
&next_walk_state,
|
||||
&index,
|
||||
&exists
|
||||
);
|
||||
if (!exists) {
|
||||
array_insert(&next_walk_states, index, next_walk_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WalkStateList _walk_states = walk_states;
|
||||
walk_states = next_walk_states;
|
||||
next_walk_states = _walk_states;
|
||||
}
|
||||
|
||||
// Debug
|
||||
// {
|
||||
// printf("Finished step indices for %u %s:", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol));
|
||||
// for (unsigned j = 0; j < finished_step_indices.size; j++) {
|
||||
// printf(" %u", finished_step_indices.contents[j]);
|
||||
// }
|
||||
// printf("\n\n");
|
||||
// }
|
||||
|
||||
// A query step is definite if the containing pattern will definitely match
|
||||
// once the step is reached. In other words, a step is *not* definite if
|
||||
// it's possible to create a syntax node that matches up to until that step,
|
||||
// but does not match the entire pattern.
|
||||
for (unsigned j = 0, n = parent_pattern->child_step_indices.size; j < n; j++) {
|
||||
uint32_t step_index = parent_pattern->child_step_indices.contents[j];
|
||||
for (unsigned k = 0; k < finished_step_indices.size; k++) {
|
||||
uint32_t finished_step_index = finished_step_indices.contents[k];
|
||||
if (finished_step_index >= j && finished_step_index < n) {
|
||||
QueryStep *step = &self->steps.contents[step_index];
|
||||
step->is_definite = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In order for a parent step to be definite, all of its child steps must
|
||||
// be definite. Propagate the definiteness up the pattern trees by walking
|
||||
// the query's steps in reverse.
|
||||
for (unsigned i = self->steps.size - 1; i + 1 > 0; i--) {
|
||||
QueryStep *step = &self->steps.contents[i];
|
||||
for (unsigned j = i + 1; j < self->steps.size; j++) {
|
||||
QueryStep *child_step = &self->steps.contents[j];
|
||||
if (child_step->depth <= step->depth) break;
|
||||
if (child_step->depth == step->depth + 1 && !child_step->is_definite) {
|
||||
step->is_definite = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Debug
|
||||
// {
|
||||
// printf("\nSteps:\n");
|
||||
// for (unsigned i = 0; i < self->steps.size; i++) {
|
||||
// QueryStep *step = &self->steps.contents[i];
|
||||
// if (step->depth == PATTERN_DONE_MARKER) {
|
||||
// printf("\n");
|
||||
// continue;
|
||||
// }
|
||||
// printf(
|
||||
// " {symbol: %s, is_definite: %d}\n",
|
||||
// (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) ? "ANY" : ts_language_symbol_name(self->language, step->symbol),
|
||||
// step->is_definite
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
|
||||
// Cleanup
|
||||
for (unsigned i = 0; i < parent_patterns.size; i++) {
|
||||
array_delete(&parent_patterns.contents[i].child_step_indices);
|
||||
}
|
||||
for (unsigned i = 0; i < subgraphs.size; i++) {
|
||||
array_delete(&subgraphs.contents[i].start_states);
|
||||
array_delete(&subgraphs.contents[i].nodes);
|
||||
}
|
||||
array_delete(&stack);
|
||||
array_delete(&subgraphs);
|
||||
array_delete(&next_nodes);
|
||||
array_delete(&walk_states);
|
||||
array_delete(&parent_patterns);
|
||||
array_delete(&next_walk_states);
|
||||
array_delete(&finished_step_indices);
|
||||
state_predecessor_map_delete(&predecessor_map);
|
||||
}
|
||||
|
||||
static void ts_query__finalize_steps(TSQuery *self) {
|
||||
for (unsigned i = 0; i < self->steps.size; i++) {
|
||||
QueryStep *step = &self->steps.contents[i];
|
||||
|
|
@ -588,7 +1143,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
predicate_name,
|
||||
length
|
||||
);
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeString,
|
||||
.value_id = id,
|
||||
|
|
@ -599,7 +1154,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
if (stream->next == ')') {
|
||||
stream_advance(stream);
|
||||
stream_skip_whitespace(stream);
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeDone,
|
||||
.value_id = 0,
|
||||
|
|
@ -628,7 +1183,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
return TSQueryErrorCapture;
|
||||
}
|
||||
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeCapture,
|
||||
.value_id = capture_id,
|
||||
|
|
@ -668,7 +1223,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
string_content,
|
||||
length
|
||||
);
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeString,
|
||||
.value_id = id,
|
||||
|
|
@ -688,7 +1243,7 @@ static TSQueryError ts_query__parse_predicate(
|
|||
symbol_start,
|
||||
length
|
||||
);
|
||||
array_back(&self->predicates_by_pattern)->length++;
|
||||
array_back(&self->patterns)->predicate_steps.length++;
|
||||
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
||||
.type = TSQueryPredicateStepTypeString,
|
||||
.value_id = id,
|
||||
|
|
@ -712,7 +1267,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
TSQuery *self,
|
||||
Stream *stream,
|
||||
uint32_t depth,
|
||||
uint32_t *capture_count,
|
||||
bool is_immediate
|
||||
) {
|
||||
const uint32_t starting_step_index = self->steps.size;
|
||||
|
|
@ -737,7 +1291,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth,
|
||||
capture_count,
|
||||
is_immediate
|
||||
);
|
||||
|
||||
|
|
@ -790,7 +1343,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth,
|
||||
capture_count,
|
||||
child_is_immediate
|
||||
);
|
||||
if (e == PARENT_DONE && stream->next == ')') {
|
||||
|
|
@ -871,7 +1423,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth + 1,
|
||||
capture_count,
|
||||
child_is_immediate
|
||||
);
|
||||
if (e == PARENT_DONE && stream->next == ')') {
|
||||
|
|
@ -955,7 +1506,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
self,
|
||||
stream,
|
||||
depth,
|
||||
capture_count,
|
||||
is_immediate
|
||||
);
|
||||
if (e == PARENT_DONE) return TSQueryErrorSyntax;
|
||||
|
|
@ -1069,8 +1619,6 @@ static TSQueryError ts_query__parse_pattern(
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(*capture_count)++;
|
||||
}
|
||||
|
||||
// No more suffix modifiers
|
||||
|
|
@ -1123,7 +1671,7 @@ TSQuery *ts_query_new(
|
|||
.captures = symbol_table_new(),
|
||||
.predicate_values = symbol_table_new(),
|
||||
.predicate_steps = array_new(),
|
||||
.predicates_by_pattern = array_new(),
|
||||
.patterns = array_new(),
|
||||
.symbol_map = symbol_map,
|
||||
.wildcard_root_pattern_count = 0,
|
||||
.language = language,
|
||||
|
|
@ -1133,15 +1681,14 @@ TSQuery *ts_query_new(
|
|||
Stream stream = stream_new(source, source_len);
|
||||
stream_skip_whitespace(&stream);
|
||||
while (stream.input < stream.end) {
|
||||
uint32_t pattern_index = self->predicates_by_pattern.size;
|
||||
uint32_t pattern_index = self->patterns.size;
|
||||
uint32_t start_step_index = self->steps.size;
|
||||
uint32_t capture_count = 0;
|
||||
array_push(&self->start_bytes_by_pattern, stream.input - source);
|
||||
array_push(&self->predicates_by_pattern, ((Slice) {
|
||||
.offset = self->predicate_steps.size,
|
||||
.length = 0,
|
||||
array_push(&self->patterns, ((QueryPattern) {
|
||||
.predicate_steps = (Slice) {.offset = self->predicate_steps.size, .length = 0},
|
||||
.start_byte = stream.input - source,
|
||||
.start_step = self->steps.size,
|
||||
}));
|
||||
*error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count, false);
|
||||
*error_type = ts_query__parse_pattern(self, &stream, 0, false);
|
||||
array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
|
||||
|
||||
// If any pattern could not be parsed, then report the error information
|
||||
|
|
@ -1183,6 +1730,10 @@ TSQuery *ts_query_new(
|
|||
}
|
||||
}
|
||||
|
||||
if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) {
|
||||
ts_query__analyze_patterns(self);
|
||||
}
|
||||
|
||||
ts_query__finalize_steps(self);
|
||||
return self;
|
||||
}
|
||||
|
|
@ -1192,8 +1743,7 @@ void ts_query_delete(TSQuery *self) {
|
|||
array_delete(&self->steps);
|
||||
array_delete(&self->pattern_map);
|
||||
array_delete(&self->predicate_steps);
|
||||
array_delete(&self->predicates_by_pattern);
|
||||
array_delete(&self->start_bytes_by_pattern);
|
||||
array_delete(&self->patterns);
|
||||
symbol_table_delete(&self->captures);
|
||||
symbol_table_delete(&self->predicate_values);
|
||||
ts_free(self->symbol_map);
|
||||
|
|
@ -1202,7 +1752,7 @@ void ts_query_delete(TSQuery *self) {
|
|||
}
|
||||
|
||||
uint32_t ts_query_pattern_count(const TSQuery *self) {
|
||||
return self->predicates_by_pattern.size;
|
||||
return self->patterns.size;
|
||||
}
|
||||
|
||||
uint32_t ts_query_capture_count(const TSQuery *self) {
|
||||
|
|
@ -1234,7 +1784,7 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern(
|
|||
uint32_t pattern_index,
|
||||
uint32_t *step_count
|
||||
) {
|
||||
Slice slice = self->predicates_by_pattern.contents[pattern_index];
|
||||
Slice slice = self->patterns.contents[pattern_index].predicate_steps;
|
||||
*step_count = slice.length;
|
||||
return &self->predicate_steps.contents[slice.offset];
|
||||
}
|
||||
|
|
@ -1243,7 +1793,35 @@ uint32_t ts_query_start_byte_for_pattern(
|
|||
const TSQuery *self,
|
||||
uint32_t pattern_index
|
||||
) {
|
||||
return self->start_bytes_by_pattern.contents[pattern_index];
|
||||
return self->patterns.contents[pattern_index].start_byte;
|
||||
}
|
||||
|
||||
bool ts_query_pattern_is_definite(
|
||||
const TSQuery *self,
|
||||
uint32_t pattern_index,
|
||||
uint32_t step_count
|
||||
) {
|
||||
uint32_t step_index = self->patterns.contents[pattern_index].start_step;
|
||||
for (;;) {
|
||||
QueryStep *start_step = &self->steps.contents[step_index];
|
||||
if (step_index + step_count < self->steps.size) {
|
||||
QueryStep *step = start_step;
|
||||
for (unsigned i = 0; i < step_count; i++) {
|
||||
if (step->depth == PATTERN_DONE_MARKER) {
|
||||
step = NULL;
|
||||
break;
|
||||
}
|
||||
step++;
|
||||
}
|
||||
if (step && !step->is_definite) return false;
|
||||
}
|
||||
if (start_step->alternative_index != NONE && start_step->alternative_index > step_index) {
|
||||
step_index = start_step->alternative_index;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ts_query_disable_capture(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue