diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 613776bf..6217cce8 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -106,6 +106,7 @@ impl Generator { } self.add_non_terminal_alias_map(); + self.add_primary_state_id_list(); let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); @@ -565,6 +566,29 @@ impl Generator { add_line!(self, ""); } + /// Produces a list of the "primary state" for every state in the grammar. + /// + /// The "primary state" for a given state is the first encountered state that behaves + /// identically with respect to query analysis. We derive this by keeping track of the `core_id` + /// for each state and treating the first state with a given `core_id` as primary. + fn add_primary_state_id_list(&mut self) { + add_line!( + self, + "static const TSStateId ts_primary_state_ids[STATE_COUNT] = {{" + ); + indent!(self); + let mut first_state_for_each_core_id = HashMap::new(); + for (idx, state) in self.parse_table.states.iter().enumerate() { + let primary_state = first_state_for_each_core_id + .entry(state.core_id) + .or_insert(idx); + add_line!(self, "[{}] = {},", idx, primary_state); + } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + fn add_field_sequences(&mut self) { let mut flat_field_maps = vec![]; let mut next_flat_field_map_index = 0; @@ -1369,6 +1393,7 @@ impl Generator { if !self.parse_table.production_infos.is_empty() { add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],"); } + add_line!(self, ".ts_primary_state_ids = ts_primary_state_ids,"); // Lexing add_line!(self, ".lex_modes = ts_lex_modes,"); diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 5bcbac42..c7fcf5fb 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -855,5 +855,5 @@ extern "C" { ); } -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 13; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 14; pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 13; diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 33b8c8f4..4ddbe0d9 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -21,7 +21,7 @@ extern "C" { * The Tree-sitter library is generally backwards-compatible with languages * generated using older CLI versions, but is not forwards-compatible. */ -#define TREE_SITTER_LANGUAGE_VERSION 13 +#define TREE_SITTER_LANGUAGE_VERSION 14 /** * The earliest ABI version that is supported by the current version of the diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index cbbc7b4e..66379d3b 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -110,6 +110,7 @@ struct TSLanguage { const TSSymbol *public_symbol_map; const uint16_t *alias_map; const TSSymbol *alias_sequences; + const TSStateId *ts_primary_state_ids; const TSLexMode *lex_modes; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); diff --git a/lib/src/language.h b/lib/src/language.h index 472eaea1..4af4592a 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -200,6 +200,19 @@ static inline TSStateId ts_language_next_state( } } +// Whether the state is a "primary state". If this returns false, it indicates that there exists +// another state that behaves identically to this one with respect to query analysis. +static inline bool ts_language_state_is_primary( + const TSLanguage *self, + TSStateId state +) { + if (self->version >= 14) { + return state == self->ts_primary_state_ids[state]; + } else { + return true; + } +} + static inline const bool *ts_language_enabled_external_tokens( const TSLanguage *self, unsigned external_scanner_state diff --git a/lib/src/query.c b/lib/src/query.c index aba9a0f3..758c4954 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -960,28 +960,30 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (lookahead_iterator.next_state != state) { state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); } - const TSSymbol *aliases, *aliases_end; - ts_language_aliases_for_symbol( - self->language, - lookahead_iterator.symbol, - &aliases, - &aliases_end - ); - for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { - array_search_sorted_by( - &subgraphs, - .symbol, - *symbol, - &subgraph_index, - &exists + if (ts_language_state_is_primary(self->language, state)) { + const TSSymbol *aliases, *aliases_end; + ts_language_aliases_for_symbol( + self->language, + lookahead_iterator.symbol, + &aliases, + &aliases_end ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if ( - subgraph->start_states.size == 0 || - *array_back(&subgraph->start_states) != state - ) - array_push(&subgraph->start_states, state); + for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { + array_search_sorted_by( + &subgraphs, + .symbol, + *symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if ( + subgraph->start_states.size == 0 || + *array_back(&subgraph->start_states) != state + ) + array_push(&subgraph->start_states, state); + } } } }