Merge pull request #500 from tree-sitter/dedup-symbols
Store a mapping to ensure no two symbols map to the same metadata
This commit is contained in:
commit
1c8aed790d
7 changed files with 128 additions and 33 deletions
|
|
@ -35,7 +35,8 @@ const NEW_HEADER_PARTS: [&'static str; 2] = [
|
|||
"
|
||||
uint32_t large_state_count;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;",
|
||||
const uint32_t *small_parse_table_map;
|
||||
const TSSymbol *public_symbol_map;",
|
||||
"
|
||||
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||
",
|
||||
|
|
|
|||
|
|
@ -80,6 +80,11 @@ impl Generator {
|
|||
self.add_stats();
|
||||
self.add_symbol_enum();
|
||||
self.add_symbol_names_list();
|
||||
|
||||
if self.next_abi {
|
||||
self.add_unique_symbol_map();
|
||||
}
|
||||
|
||||
self.add_symbol_metadata_list();
|
||||
|
||||
if !self.field_names.is_empty() {
|
||||
|
|
@ -320,6 +325,51 @@ impl Generator {
|
|||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_unique_symbol_map(&mut self) {
|
||||
add_line!(self, "static TSSymbol ts_symbol_map[] = {{");
|
||||
indent!(self);
|
||||
for symbol in &self.parse_table.symbols {
|
||||
let mut mapping = symbol;
|
||||
|
||||
// If this symbol has a simple alias, then check if its alias has the same
|
||||
// name and kind (e.g. named vs anonymous) as some other symbol in the grammar.
|
||||
// If so, add an entry to the symbol map that deduplicates these two symbols,
|
||||
// so that only one of them will ever be returned via the public API.
|
||||
if let Some(alias) = self.simple_aliases.get(symbol) {
|
||||
let kind = if alias.is_named {
|
||||
VariableType::Named
|
||||
} else {
|
||||
VariableType::Anonymous
|
||||
};
|
||||
|
||||
for other_symbol in &self.parse_table.symbols {
|
||||
if other_symbol == symbol {
|
||||
continue;
|
||||
}
|
||||
if let Some(other_alias) = self.simple_aliases.get(other_symbol) {
|
||||
if other_symbol < symbol && other_alias == alias {
|
||||
mapping = other_symbol;
|
||||
break;
|
||||
}
|
||||
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
|
||||
mapping = other_symbol;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
add_line!(
|
||||
self,
|
||||
"[{}] = {},",
|
||||
self.symbol_ids[&symbol],
|
||||
self.symbol_ids[&mapping],
|
||||
);
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_field_name_enum(&mut self) {
|
||||
add_line!(self, "enum {{");
|
||||
indent!(self);
|
||||
|
|
@ -1072,6 +1122,10 @@ impl Generator {
|
|||
add_line!(self, ".lex_modes = ts_lex_modes,");
|
||||
add_line!(self, ".symbol_names = ts_symbol_names,");
|
||||
|
||||
if self.next_abi {
|
||||
add_line!(self, ".public_symbol_map = ts_symbol_map,");
|
||||
}
|
||||
|
||||
if !self.parse_table.production_infos.is_empty() {
|
||||
add_line!(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -118,6 +118,7 @@ struct TSLanguage {
|
|||
uint32_t large_state_count;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;
|
||||
const TSSymbol *public_symbol_map;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -3,8 +3,28 @@
|
|||
#include "./error_costs.h"
|
||||
#include <string.h>
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol symbol, TableEntry *result) {
|
||||
uint32_t ts_language_symbol_count(const TSLanguage *self) {
|
||||
return self->symbol_count + self->alias_count;
|
||||
}
|
||||
|
||||
uint32_t ts_language_version(const TSLanguage *self) {
|
||||
return self->version;
|
||||
}
|
||||
|
||||
uint32_t ts_language_field_count(const TSLanguage *self) {
|
||||
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
|
||||
return self->field_count;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void ts_language_table_entry(
|
||||
const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol,
|
||||
TableEntry *result
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
|
||||
result->action_count = 0;
|
||||
result->is_reusable = false;
|
||||
|
|
@ -19,31 +39,41 @@ void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t ts_language_symbol_count(const TSLanguage *language) {
|
||||
return language->symbol_count + language->alias_count;
|
||||
}
|
||||
|
||||
uint32_t ts_language_version(const TSLanguage *language) {
|
||||
return language->version;
|
||||
}
|
||||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) {
|
||||
TSSymbolMetadata ts_language_symbol_metadata(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error) {
|
||||
return (TSSymbolMetadata){.visible = true, .named = true};
|
||||
} else if (symbol == ts_builtin_sym_error_repeat) {
|
||||
return (TSSymbolMetadata){.visible = false, .named = false};
|
||||
} else {
|
||||
return language->symbol_metadata[symbol];
|
||||
return self->symbol_metadata[symbol];
|
||||
}
|
||||
}
|
||||
|
||||
const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) {
|
||||
TSSymbol ts_language_public_symbol(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error) return symbol;
|
||||
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
|
||||
return self->public_symbol_map[symbol];
|
||||
} else {
|
||||
return symbol;
|
||||
}
|
||||
}
|
||||
|
||||
const char *ts_language_symbol_name(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error) {
|
||||
return "ERROR";
|
||||
} else if (symbol == ts_builtin_sym_error_repeat) {
|
||||
return "_ERROR";
|
||||
} else {
|
||||
return language->symbol_names[symbol];
|
||||
return self->symbol_names[symbol];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -59,13 +89,22 @@ TSSymbol ts_language_symbol_for_name(
|
|||
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
|
||||
if (!metadata.visible || metadata.named != is_named) continue;
|
||||
const char *symbol_name = self->symbol_names[i];
|
||||
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) return i;
|
||||
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
|
||||
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
|
||||
return self->public_symbol_map[i];
|
||||
} else {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
|
||||
TSSymbolType ts_language_symbol_type(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
|
||||
if (metadata.named) {
|
||||
return TSSymbolTypeRegular;
|
||||
} else if (metadata.visible) {
|
||||
|
|
@ -75,15 +114,10 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t ts_language_field_count(const TSLanguage *self) {
|
||||
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
|
||||
return self->field_count;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) {
|
||||
const char *ts_language_field_name_for_id(
|
||||
const TSLanguage *self,
|
||||
TSFieldId id
|
||||
) {
|
||||
uint32_t count = ts_language_field_count(self);
|
||||
if (count) {
|
||||
return self->field_names[id];
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ extern "C" {
|
|||
|
||||
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
|
||||
#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
|
||||
#define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11
|
||||
#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -22,6 +23,8 @@ void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry
|
|||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
|
||||
|
||||
TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
|
||||
|
||||
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
|
||||
return 0 < symbol && symbol < self->external_token_count + 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -415,13 +415,15 @@ TSPoint ts_node_end_point(TSNode self) {
|
|||
}
|
||||
|
||||
TSSymbol ts_node_symbol(TSNode self) {
|
||||
return ts_node__alias(&self)
|
||||
? ts_node__alias(&self)
|
||||
: ts_subtree_symbol(ts_node__subtree(self));
|
||||
TSSymbol symbol = ts_node__alias(&self);
|
||||
if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
|
||||
return ts_language_public_symbol(self.tree->language, symbol);
|
||||
}
|
||||
|
||||
const char *ts_node_type(TSNode self) {
|
||||
return ts_language_symbol_name(self.tree->language, ts_node_symbol(self));
|
||||
TSSymbol symbol = ts_node__alias(&self);
|
||||
if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
|
||||
return ts_language_symbol_name(self.tree->language, symbol);
|
||||
}
|
||||
|
||||
char *ts_node_string(TSNode self) {
|
||||
|
|
|
|||
|
|
@ -742,8 +742,8 @@ TSQuery *ts_query_new(
|
|||
) {
|
||||
// Work around the fact that multiple symbols can currently be
|
||||
// associated with the same name, due to "simple aliases".
|
||||
// In the next language ABI version, this map should be contained
|
||||
// within the language itself.
|
||||
// In the next language ABI version, this map will be contained
|
||||
// in the language's `public_symbol_map` field.
|
||||
uint32_t symbol_count = ts_language_symbol_count(language);
|
||||
TSSymbol *symbol_map = ts_malloc(sizeof(TSSymbol) * symbol_count);
|
||||
for (unsigned i = 0; i < symbol_count; i++) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue