diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 1ce3ce55..cec02b50 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -11,11 +11,8 @@ use std::collections::{HashMap, HashSet}; use std::fmt::Write; use std::mem::swap; -// Currently, the library supports a new ABI version that has not yet been -// stabilized, and the parser generation does not use it by default. -const STABLE_LANGUAGE_VERSION: usize = tree_sitter::LANGUAGE_VERSION - 1; - const LARGE_CHARACTER_RANGE_COUNT: usize = 8; +const SMALL_STATE_THRESHOLD: usize = 64; macro_rules! add { ($this: tt, $($arg: tt)*) => {{ @@ -52,8 +49,6 @@ macro_rules! dedent { }; } -const SMALL_STATE_THRESHOLD: usize = 64; - struct Generator { buffer: String, indent_level: usize, @@ -72,6 +67,8 @@ struct Generator { unique_aliases: Vec, symbol_map: HashMap, field_names: Vec, + + #[allow(unused)] next_abi: bool, } @@ -109,9 +106,7 @@ impl Generator { self.add_alias_sequences(); } - if self.next_abi { - self.add_non_terminal_alias_map(); - } + self.add_non_terminal_alias_map(); let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); @@ -296,15 +291,11 @@ impl Generator { }) .count(); - if self.next_abi { - add_line!( - self, - "#define LANGUAGE_VERSION {}", - tree_sitter::LANGUAGE_VERSION - ); - } else { - add_line!(self, "#define LANGUAGE_VERSION {}", STABLE_LANGUAGE_VERSION); - } + add_line!( + self, + "#define LANGUAGE_VERSION {}", + tree_sitter::LANGUAGE_VERSION + ); add_line!( self, @@ -331,6 +322,11 @@ impl Generator { "#define MAX_ALIAS_SEQUENCE_LENGTH {}", self.parse_table.max_aliased_production_length ); + add_line!( + self, + "#define PRODUCTION_ID_COUNT {}", + self.parse_table.production_infos.len() + ); add_line!(self, ""); } @@ -488,8 +484,7 @@ impl Generator { fn add_alias_sequences(&mut self) { add_line!( self, - "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{", - self.parse_table.production_infos.len() + "static TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {{", ); indent!(self); for (i, production_info) in self.parse_table.production_infos.iter().enumerate() { @@ -597,8 +592,7 @@ impl Generator { add_line!( self, - "static const TSFieldMapSlice ts_field_map_slices[{}] = {{", - self.parse_table.production_infos.len(), + "static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{", ); indent!(self); for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { @@ -1394,11 +1388,9 @@ impl Generator { } add_line!(self, ".public_symbol_map = ts_symbol_map,"); - - if self.next_abi { - add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); - add_line!(self, ".state_count = STATE_COUNT,"); - } + add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); + add_line!(self, ".state_count = STATE_COUNT,"); + add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,"); dedent!(self); add_line!(self, "}};"); diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index f28d3461..154ef826 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -208,8 +208,8 @@ extern "C" { #[doc = " following three fields:"] #[doc = " 1. `read`: A function to retrieve a chunk of text at a given byte offset"] #[doc = " and (row, column) position. The function should return a pointer to the"] - #[doc = " text and write its length to the the `bytes_read` pointer. The parser"] - #[doc = " does not take ownership of this buffer; it just borrows it until it has"] + #[doc = " text and write its length to the `bytes_read` pointer. The parser does"] + #[doc = " not take ownership of this buffer; it just borrows it until it has"] #[doc = " finished reading it. The function should write a zero value to the"] #[doc = " `bytes_read` pointer to indicate the end of the document."] #[doc = " 2. `payload`: An arbitrary pointer that will be passed to each invocation"] @@ -697,7 +697,7 @@ extern "C" { #[doc = " to start running a given query on a given syntax node. Then, there are"] #[doc = " two options for consuming the results of the query:"] #[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"] - #[doc = " the *matches* in the order that they were found. Each match contains the"] + #[doc = " *matches* in the order that they were found. Each match contains the"] #[doc = " index of the pattern that matched, and an array of captures. Because"] #[doc = " multiple patterns can match the same set of nodes, one match may contain"] #[doc = " captures that appear *before* some of the captures from a previous match."] @@ -804,5 +804,5 @@ extern "C" { pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 12; -pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 13; +pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 13; diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index caa05f52..3299fd20 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -21,13 +21,13 @@ extern "C" { * The Tree-sitter library is generally backwards-compatible with languages * generated using older CLI versions, but is not forwards-compatible. */ -#define TREE_SITTER_LANGUAGE_VERSION 12 +#define TREE_SITTER_LANGUAGE_VERSION 13 /** * The earliest ABI version that is supported by the current version of the * library. */ -#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9 +#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13 /*******************/ /* Section - Types */ diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index c5a788ff..05e61371 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -13,6 +13,8 @@ extern "C" { #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 +typedef uint16_t TSStateId; + #ifndef TREE_SITTER_API_H_ typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; @@ -30,12 +32,10 @@ typedef struct { uint16_t length; } TSFieldMapSlice; -typedef uint16_t TSStateId; - typedef struct { - bool visible : 1; - bool named : 1; - bool supertype: 1; + bool visible; + bool named; + bool supertype; } TSSymbolMetadata; typedef struct TSLexer TSLexer; @@ -57,21 +57,21 @@ typedef enum { TSParseActionTypeRecover, } TSParseActionType; -typedef struct { - union { - struct { - TSStateId state; - bool extra : 1; - bool repetition : 1; - } shift; - struct { - TSSymbol symbol; - int16_t dynamic_precedence; - uint8_t child_count; - uint8_t production_id; - } reduce; - } params; - TSParseActionType type : 4; +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; } TSParseAction; typedef struct { @@ -83,7 +83,7 @@ typedef union { TSParseAction action; struct { uint8_t count; - bool reusable : 1; + bool reusable; } entry; } TSParseActionEntry; @@ -122,6 +122,7 @@ struct TSLanguage { const TSSymbol *public_symbol_map; const uint16_t *alias_map; uint32_t state_count; + uint32_t production_id_count; }; /* @@ -170,66 +171,50 @@ struct TSLanguage { #define ACTIONS(id) id -#define SHIFT(state_value) \ - { \ - { \ - .params = { \ - .shift = { \ - .state = state_value \ - } \ - }, \ - .type = TSParseActionTypeShift \ - } \ - } +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value \ + } \ + }} #define SHIFT_REPEAT(state_value) \ - { \ - { \ - .params = { \ - .shift = { \ - .state = state_value, \ - .repetition = true \ - } \ - }, \ - .type = TSParseActionTypeShift \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value, \ + .repetition = true \ } \ - } - -#define RECOVER() \ - { \ - { .type = TSParseActionTypeRecover } \ - } + }} #define SHIFT_EXTRA() \ - { \ - { \ - .params = { \ - .shift = { \ - .extra = true \ - } \ - }, \ - .type = TSParseActionTypeShift \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ } \ - } + }} #define REDUCE(symbol_val, child_count_val, ...) \ - { \ - { \ - .params = { \ - .reduce = { \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - }, \ - }, \ - .type = TSParseActionTypeReduce \ - } \ - } + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + }, \ + }} -#define ACCEPT_INPUT() \ - { \ - { .type = TSParseActionTypeAccept } \ - } +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} #ifdef __cplusplus } diff --git a/lib/src/language.c b/lib/src/language.c index 9ccf2bc3..5fbb3180 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -12,11 +12,7 @@ uint32_t ts_language_version(const TSLanguage *self) { } uint32_t ts_language_field_count(const TSLanguage *self) { - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) { - return self->field_count; - } else { - return 0; - } + return self->field_count; } void ts_language_table_entry( @@ -57,11 +53,7 @@ TSSymbol ts_language_public_symbol( TSSymbol symbol ) { if (symbol == ts_builtin_sym_error) return symbol; - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - return self->public_symbol_map[symbol]; - } else { - return symbol; - } + return self->public_symbol_map[symbol]; } const char *ts_language_symbol_name( @@ -92,11 +84,7 @@ TSSymbol ts_language_symbol_for_name( if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; const char *symbol_name = self->symbol_names[i]; if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - return self->public_symbol_map[i]; - } else { - return i; - } + return self->public_symbol_map[i]; } } return 0; diff --git a/lib/src/language.h b/lib/src/language.h index e5c07aa2..47f63672 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -9,11 +9,6 @@ extern "C" { #include "tree_sitter/parser.h" #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) -#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT 12 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP 12 typedef struct { const TSParseAction *actions; @@ -91,10 +86,7 @@ static inline uint16_t ts_language_lookup( TSStateId state, TSSymbol symbol ) { - if ( - self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES && - state >= self->large_state_count - ) { + if (state >= self->large_state_count) { uint32_t index = self->small_parse_table_map[state - self->large_state_count]; const uint16_t *data = &self->small_parse_table[index]; uint16_t group_count = *(data++); @@ -121,9 +113,7 @@ static inline LookaheadIterator ts_language_lookaheads( const TSLanguage *self, TSStateId state ) { - bool is_small_state = - self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES && - state >= self->large_state_count; + bool is_small_state = state >= self->large_state_count; const uint16_t *data; const uint16_t *group_end = NULL; uint16_t group_count = 0; @@ -203,7 +193,7 @@ static inline TSStateId ts_language_next_state( if (count > 0) { TSParseAction action = actions[count - 1]; if (action.type == TSParseActionTypeShift) { - return action.params.shift.extra ? state : action.params.shift.state; + return action.shift.extra ? state : action.shift.state; } } return 0; @@ -248,7 +238,7 @@ static inline void ts_language_field_map( const TSFieldMapEntry **start, const TSFieldMapEntry **end ) { - if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) { + if (self->field_count == 0) { *start = NULL; *end = NULL; return; @@ -268,8 +258,6 @@ static inline void ts_language_aliases_for_symbol( *start = &self->public_symbol_map[original_symbol]; *end = *start + 1; - if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP) return; - unsigned i = 0; for (;;) { TSSymbol symbol = self->alias_map[i++]; diff --git a/lib/src/parser.c b/lib/src/parser.c index e9d87ac9..35069f63 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1012,15 +1012,15 @@ static bool ts_parser__do_all_potential_reductions( switch (action.type) { case TSParseActionTypeShift: case TSParseActionTypeRecover: - if (!action.params.shift.extra && !action.params.shift.repetition) has_shift_action = true; + if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; break; case TSParseActionTypeReduce: - if (action.params.reduce.child_count > 0) + if (action.reduce.child_count > 0) ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){ - .symbol = action.params.reduce.symbol, - .count = action.params.reduce.child_count, - .dynamic_precedence = action.params.reduce.dynamic_precedence, - .production_id = action.params.reduce.production_id, + .symbol = action.reduce.symbol, + .count = action.reduce.child_count, + .dynamic_precedence = action.reduce.dynamic_precedence, + .production_id = action.reduce.production_id, }); break; default: @@ -1311,7 +1311,7 @@ static void ts_parser__recover( // be counted in error cost calculations. unsigned n; const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); - if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.shift.extra) { + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); ts_subtree_set_extra(&mutable_lookahead); lookahead = ts_subtree_from_mut(mutable_lookahead); @@ -1441,17 +1441,13 @@ static bool ts_parser__advance( switch (action.type) { case TSParseActionTypeShift: { - if (action.params.shift.repetition) break; + if (action.shift.repetition) break; TSStateId next_state; - if (action.params.shift.extra) { - - // TODO: remove when TREE_SITTER_LANGUAGE_VERSION 9 is out. - if (state == ERROR_STATE) continue; - + if (action.shift.extra) { next_state = state; LOG("shift_extra"); } else { - next_state = action.params.shift.state; + next_state = action.shift.state; LOG("shift state:%u", next_state); } @@ -1460,7 +1456,7 @@ static bool ts_parser__advance( next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); } - ts_parser__shift(self, version, next_state, lookahead, action.params.shift.extra); + ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); if (did_reuse) reusable_node_advance(&self->reusable_node); return true; } @@ -1468,10 +1464,10 @@ static bool ts_parser__advance( case TSParseActionTypeReduce: { bool is_fragile = table_entry.action_count > 1; bool end_of_non_terminal_extra = lookahead.ptr == NULL; - LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); StackVersion reduction_version = ts_parser__reduce( - self, version, action.params.reduce.symbol, action.params.reduce.child_count, - action.params.reduce.dynamic_precedence, action.params.reduce.production_id, + self, version, action.reduce.symbol, action.reduce.child_count, + action.reduce.dynamic_precedence, action.reduce.production_id, is_fragile, end_of_non_terminal_extra ); if (reduction_version != STACK_VERSION_NONE) { diff --git a/lib/src/query.c b/lib/src/query.c index bf0598ce..247880c7 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -217,7 +217,6 @@ struct TSQuery { Array(char) string_buffer; const TSLanguage *language; uint16_t wildcard_root_pattern_count; - TSSymbol *symbol_map; }; /* @@ -755,7 +754,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { const TSSymbol *aliases, *aliases_end; ts_language_aliases_for_symbol( self->language, - action->params.reduce.symbol, + action->reduce.symbol, &aliases, &aliases_end ); @@ -772,15 +771,15 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { .state = state, - .production_id = action->params.reduce.production_id, - .child_index = action->params.reduce.child_count, + .production_id = action->reduce.production_id, + .child_index = action->reduce.child_count, .done = true, })); } } } - } else if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { - TSStateId next_state = action->params.shift.state; + } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { + TSStateId next_state = action->shift.state; state_predecessor_map_add(&predecessor_map, next_state, state); } } @@ -1019,8 +1018,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { TSStateId next_parse_state; if (lookahead_iterator.action_count) { const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; - if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { - next_parse_state = action->params.shift.state; + if (action->type == TSParseActionTypeShift && !action->shift.extra) { + next_parse_state = action->shift.state; } else { continue; } @@ -1896,33 +1895,6 @@ TSQuery *ts_query_new( uint32_t *error_offset, TSQueryError *error_type ) { - TSSymbol *symbol_map; - if (ts_language_version(language) >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - symbol_map = NULL; - } else { - // Work around the fact that multiple symbols can currently be - // associated with the same name, due to "simple aliases". - // In the next language ABI version, this map will be contained - // in the language's `public_symbol_map` field. - uint32_t symbol_count = ts_language_symbol_count(language); - symbol_map = ts_malloc(sizeof(TSSymbol) * symbol_count); - for (unsigned i = 0; i < symbol_count; i++) { - const char *name = ts_language_symbol_name(language, i); - const TSSymbolType symbol_type = ts_language_symbol_type(language, i); - - symbol_map[i] = i; - - for (unsigned j = 0; j < i; j++) { - if (ts_language_symbol_type(language, j) == symbol_type) { - if (!strcmp(name, ts_language_symbol_name(language, j))) { - symbol_map[i] = j; - break; - } - } - } - } - } - TSQuery *self = ts_malloc(sizeof(TSQuery)); *self = (TSQuery) { .steps = array_new(), @@ -1933,7 +1905,6 @@ TSQuery *ts_query_new( .patterns = array_new(), .step_offsets = array_new(), .string_buffer = array_new(), - .symbol_map = symbol_map, .wildcard_root_pattern_count = 0, .language = language, }; @@ -2003,12 +1974,10 @@ TSQuery *ts_query_new( } } - if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) { - if (!ts_query__analyze_patterns(self, error_offset)) { - *error_type = TSQueryErrorStructure; - ts_query_delete(self); - return NULL; - } + if (!ts_query__analyze_patterns(self, error_offset)) { + *error_type = TSQueryErrorStructure; + ts_query_delete(self); + return NULL; } ts_query__finalize_steps(self); @@ -2026,7 +1995,6 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->string_buffer); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); - ts_free(self->symbol_map); ts_free(self); } } @@ -2585,9 +2553,6 @@ static inline bool ts_query_cursor__advance( // Get the properties of the current node. TSSymbol symbol = ts_node_symbol(node); bool is_named = ts_node_is_named(node); - if (symbol != ts_builtin_sym_error && self->query->symbol_map) { - symbol = self->query->symbol_map[symbol]; - } bool has_later_siblings; bool has_later_named_siblings; bool can_have_later_siblings_with_this_field;