refactor: change symbol_ids to store both string and numeric IDs
- Modified symbol_ids HashMap to store tuples of (String, u16) instead of just String - Updated symbol ID generation to assign numeric IDs sequentially (0 for end symbol, then 1, 2, 3...) - Changed all symbol_ids access patterns throughout codebase to use tuple destructuring (.0 for string, .1 for numeric) - Updated node_types.json to use numeric u16 symbol_id instead of String
This commit is contained in:
parent
8238c36f5f
commit
21c9f9ae4f
3 changed files with 63 additions and 48 deletions
|
|
@ -78,7 +78,7 @@ struct GrammarIntrospection {
|
|||
variable_info: Vec<VariableInfo>,
|
||||
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
|
||||
tables: Tables,
|
||||
symbol_ids: HashMap<Symbol, String>,
|
||||
symbol_ids: HashMap<Symbol, (String, u16)>,
|
||||
alias_ids: HashMap<Alias, String>,
|
||||
unique_aliases: Vec<Alias>,
|
||||
}
|
||||
|
|
@ -405,7 +405,7 @@ fn introspect_grammar(
|
|||
optimizations,
|
||||
)?;
|
||||
|
||||
// Generate symbol IDs before rendering C code
|
||||
// Generate symbol IDs (both string and numeric) before rendering C code
|
||||
let (symbol_ids, alias_ids, unique_aliases) = generate_symbol_ids(
|
||||
&tables.parse_table,
|
||||
&syntax_grammar,
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ pub struct NodeInfoJSON {
|
|||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
subtypes: Option<Vec<NodeTypeJSON>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
symbol_id: Option<String>,
|
||||
symbol_id: Option<u16>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
|
|
@ -475,7 +475,7 @@ pub fn generate_node_types_json(
|
|||
lexical_grammar: &LexicalGrammar,
|
||||
default_aliases: &AliasMap,
|
||||
variable_info: &[VariableInfo],
|
||||
symbol_ids: &HashMap<Symbol, String>,
|
||||
symbol_ids: &HashMap<Symbol, (String, u16)>,
|
||||
) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
|
||||
let mut node_types_json = BTreeMap::new();
|
||||
|
||||
|
|
@ -575,7 +575,7 @@ pub fn generate_node_types_json(
|
|||
fields: None,
|
||||
children: None,
|
||||
subtypes: None,
|
||||
symbol_id: symbol_ids.get(&symbol).cloned(),
|
||||
symbol_id: symbol_ids.get(&symbol).map(|t| t.1),
|
||||
});
|
||||
let mut subtypes = info
|
||||
.children
|
||||
|
|
@ -620,7 +620,7 @@ pub fn generate_node_types_json(
|
|||
fields: Some(BTreeMap::new()),
|
||||
children: None,
|
||||
subtypes: None,
|
||||
symbol_id: symbol_ids.get(&symbol).cloned(),
|
||||
symbol_id: symbol_ids.get(&symbol).map(|t| t.1),
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -758,7 +758,7 @@ pub fn generate_node_types_json(
|
|||
fields: None,
|
||||
children: None,
|
||||
subtypes: None,
|
||||
symbol_id: symbol_ids.get(&symbol).cloned(),
|
||||
symbol_id: symbol_ids.get(&symbol).map(|t| t.1),
|
||||
});
|
||||
if let Some(children) = &mut node_type_json.children {
|
||||
children.required = false;
|
||||
|
|
@ -777,7 +777,7 @@ pub fn generate_node_types_json(
|
|||
fields: None,
|
||||
children: None,
|
||||
subtypes: None,
|
||||
symbol_id: symbol_ids.get(&symbol).cloned(),
|
||||
symbol_id: symbol_ids.get(&symbol).map(|t| t.1),
|
||||
}),
|
||||
_ => {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,8 +78,7 @@ struct Generator {
|
|||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
default_aliases: AliasMap,
|
||||
symbol_order: HashMap<Symbol, usize>,
|
||||
symbol_ids: HashMap<Symbol, String>,
|
||||
symbol_ids: HashMap<Symbol, (String, u16)>,
|
||||
alias_ids: HashMap<Alias, String>,
|
||||
unique_aliases: Vec<Alias>,
|
||||
symbol_map: HashMap<Symbol, Symbol>,
|
||||
|
|
@ -236,7 +235,7 @@ impl Generator {
|
|||
// Some aliases match an existing symbol in the grammar.
|
||||
let alias_id =
|
||||
if let Some(existing_symbol) = self.symbols_for_alias(alias).first() {
|
||||
self.symbol_ids[&self.symbol_map[existing_symbol]].clone()
|
||||
self.symbol_ids[&self.symbol_map[existing_symbol]].0.clone()
|
||||
}
|
||||
// Other aliases don't match any existing symbol, and need their own
|
||||
// identifiers.
|
||||
|
|
@ -264,7 +263,7 @@ impl Generator {
|
|||
.count()
|
||||
+ 1;
|
||||
let constant_name = if let Some(symbol) = symbol {
|
||||
format!("{}_character_set_{}", self.symbol_ids[symbol], count)
|
||||
format!("{}_character_set_{}", self.symbol_ids[symbol].0, count)
|
||||
} else {
|
||||
format!("extras_character_set_{count}")
|
||||
};
|
||||
|
|
@ -294,7 +293,7 @@ impl Generator {
|
|||
for (supertype, subtypes) in &self.supertype_symbol_map {
|
||||
if let Some(supertype) = self.symbol_ids.get(supertype) {
|
||||
self.supertype_map
|
||||
.entry(supertype.clone())
|
||||
.entry(supertype.0.clone())
|
||||
.or_insert_with(|| subtypes.clone());
|
||||
}
|
||||
}
|
||||
|
|
@ -416,18 +415,19 @@ impl Generator {
|
|||
fn add_symbol_enum(&mut self) {
|
||||
add_line!(self, "enum ts_symbol_identifiers {{");
|
||||
indent!(self);
|
||||
self.symbol_order.insert(Symbol::end(), 0);
|
||||
let mut i = 1;
|
||||
// symbol_ids already contains both string ID and numeric ID
|
||||
for symbol in &self.parse_table.symbols {
|
||||
if *symbol != Symbol::end() {
|
||||
self.symbol_order.insert(*symbol, i);
|
||||
add_line!(self, "{} = {i},", self.symbol_ids[symbol]);
|
||||
i += 1;
|
||||
if *symbol == Symbol::end() {
|
||||
continue;
|
||||
}
|
||||
let (string_id, numeric_id) = &self.symbol_ids[symbol];
|
||||
add_line!(self, "{} = {numeric_id},", string_id);
|
||||
}
|
||||
for alias in &self.unique_aliases {
|
||||
// Add aliases after all symbols
|
||||
let alias_start = self.parse_table.symbols.len();
|
||||
for (idx, alias) in self.unique_aliases.iter().enumerate() {
|
||||
let i = alias_start + idx;
|
||||
add_line!(self, "{} = {i},", self.alias_ids[alias]);
|
||||
i += 1;
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
|
|
@ -445,7 +445,7 @@ impl Generator {
|
|||
alias.value.as_str()
|
||||
}),
|
||||
);
|
||||
add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol]);
|
||||
add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol].0);
|
||||
}
|
||||
for alias in &self.unique_aliases {
|
||||
add_line!(
|
||||
|
|
@ -467,8 +467,8 @@ impl Generator {
|
|||
add_line!(
|
||||
self,
|
||||
"[{}] = {},",
|
||||
self.symbol_ids[symbol],
|
||||
self.symbol_ids[&self.symbol_map[symbol]],
|
||||
self.symbol_ids[symbol].0,
|
||||
self.symbol_ids[&self.symbol_map[symbol]].0,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -516,7 +516,7 @@ impl Generator {
|
|||
);
|
||||
indent!(self);
|
||||
for symbol in &self.parse_table.symbols {
|
||||
add_line!(self, "[{}] = {{", self.symbol_ids[symbol]);
|
||||
add_line!(self, "[{}] = {{", self.symbol_ids[symbol].0);
|
||||
indent!(self);
|
||||
if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
|
||||
add_line!(self, ".visible = true,");
|
||||
|
|
@ -623,8 +623,8 @@ impl Generator {
|
|||
);
|
||||
indent!(self);
|
||||
for (symbol, alias_ids) in alias_ids_by_symbol {
|
||||
let symbol_id = &self.symbol_ids[symbol];
|
||||
let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]];
|
||||
let symbol_id = &self.symbol_ids[symbol].0;
|
||||
let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]].0;
|
||||
add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len());
|
||||
indent!(self);
|
||||
add_line!(self, "{public_symbol_id},");
|
||||
|
|
@ -761,13 +761,15 @@ impl Generator {
|
|||
subtypes
|
||||
.iter()
|
||||
.flat_map(|s| match s {
|
||||
ChildType::Normal(symbol) => vec![self.symbol_ids.get(symbol).cloned()],
|
||||
ChildType::Normal(symbol) => {
|
||||
vec![self.symbol_ids.get(symbol).map(|t| t.0.clone())]
|
||||
}
|
||||
ChildType::Aliased(alias) => {
|
||||
self.alias_ids.get(alias).cloned().map_or_else(
|
||||
|| {
|
||||
self.symbols_for_alias(alias)
|
||||
.into_iter()
|
||||
.map(|s| self.symbol_ids.get(&s).cloned())
|
||||
.map(|s| self.symbol_ids.get(&s).map(|t| t.0.clone()))
|
||||
.collect()
|
||||
},
|
||||
|a| vec![Some(a)],
|
||||
|
|
@ -846,7 +848,7 @@ impl Generator {
|
|||
|
||||
fn add_lex_state(&mut self, _state_ix: usize, state: LexState) {
|
||||
if let Some(accept_action) = state.accept_action {
|
||||
add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
|
||||
add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action].0);
|
||||
}
|
||||
|
||||
if let Some(eof_action) = state.eof_action {
|
||||
|
|
@ -1190,7 +1192,7 @@ impl Generator {
|
|||
add_line!(self, "[{id}] = {{");
|
||||
indent!(self);
|
||||
for token in set.iter() {
|
||||
add_line!(self, "{},", self.symbol_ids[&token]);
|
||||
add_line!(self, "{},", self.symbol_ids[&token].0);
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}},");
|
||||
|
|
@ -1230,7 +1232,7 @@ impl Generator {
|
|||
self,
|
||||
"[{}] = {},",
|
||||
self.external_token_id(token),
|
||||
self.symbol_ids[&id_token],
|
||||
self.symbol_ids[&id_token].0,
|
||||
);
|
||||
}
|
||||
dedent!(self);
|
||||
|
|
@ -1304,14 +1306,14 @@ impl Generator {
|
|||
nonterminal_entries.clear();
|
||||
terminal_entries.extend(state.terminal_entries.iter());
|
||||
nonterminal_entries.extend(state.nonterminal_entries.iter());
|
||||
terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
|
||||
terminal_entries.sort_unstable_by_key(|e| self.symbol_ids.get(e.0).map(|t| &t.1));
|
||||
nonterminal_entries.sort_unstable_by_key(|k| k.0);
|
||||
|
||||
for (symbol, action) in &nonterminal_entries {
|
||||
add_line!(
|
||||
self,
|
||||
"[{}] = STATE({}),",
|
||||
self.symbol_ids[symbol],
|
||||
self.symbol_ids[symbol].0,
|
||||
match action {
|
||||
GotoAction::Goto(state) => *state,
|
||||
GotoAction::ShiftExtra => i,
|
||||
|
|
@ -1325,7 +1327,11 @@ impl Generator {
|
|||
&mut parse_table_entries,
|
||||
&mut next_parse_action_list_index,
|
||||
);
|
||||
add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
|
||||
add_line!(
|
||||
self,
|
||||
"[{}] = ACTIONS({entry_id}),",
|
||||
self.symbol_ids[symbol].0
|
||||
);
|
||||
}
|
||||
|
||||
dedent!(self);
|
||||
|
|
@ -1354,7 +1360,7 @@ impl Generator {
|
|||
|
||||
terminal_entries.clear();
|
||||
terminal_entries.extend(state.terminal_entries.iter());
|
||||
terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
|
||||
terminal_entries.sort_unstable_by_key(|e| self.symbol_ids.get(e.0).map(|t| &t.1));
|
||||
|
||||
// In a given parse state, many lookahead symbols have the same actions.
|
||||
// So in the "small state" representation, group symbols by their action
|
||||
|
|
@ -1407,7 +1413,7 @@ impl Generator {
|
|||
symbols.sort_unstable();
|
||||
indent!(self);
|
||||
for symbol in symbols {
|
||||
add_line!(self, "{},", self.symbol_ids[symbol]);
|
||||
add_line!(self, "{},", self.symbol_ids[symbol].0);
|
||||
}
|
||||
dedent!(self);
|
||||
}
|
||||
|
|
@ -1483,7 +1489,7 @@ impl Generator {
|
|||
add!(
|
||||
self,
|
||||
"REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})",
|
||||
self.symbol_ids[&symbol]
|
||||
self.symbol_ids[&symbol].0
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1595,7 +1601,7 @@ impl Generator {
|
|||
add_line!(
|
||||
self,
|
||||
".keyword_capture_token = {},",
|
||||
self.symbol_ids[&keyword_capture_token]
|
||||
self.symbol_ids[&keyword_capture_token].0
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1898,8 +1904,9 @@ fn assign_symbol_id(
|
|||
symbol: Symbol,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
symbol_ids: &mut HashMap<Symbol, String>,
|
||||
symbol_ids: &mut HashMap<Symbol, (String, u16)>,
|
||||
used_identifiers: &mut HashSet<String>,
|
||||
numeric_id: u16,
|
||||
) {
|
||||
let mut id;
|
||||
if symbol == Symbol::end() {
|
||||
|
|
@ -1925,7 +1932,7 @@ fn assign_symbol_id(
|
|||
}
|
||||
|
||||
used_identifiers.insert(id.clone());
|
||||
symbol_ids.insert(symbol, id);
|
||||
symbol_ids.insert(symbol, (id, numeric_id));
|
||||
}
|
||||
|
||||
/// Generates symbol IDs and alias IDs for the given parse table and grammars.
|
||||
|
|
@ -1943,7 +1950,7 @@ fn assign_symbol_id(
|
|||
/// # Returns
|
||||
///
|
||||
/// A tuple containing:
|
||||
/// * `symbol_ids` - HashMap mapping each Symbol to its C identifier string
|
||||
/// * `symbol_ids` - HashMap mapping each Symbol to (C identifier string, numeric ID)
|
||||
/// * `alias_ids` - HashMap mapping each Alias to its C identifier string
|
||||
/// * `unique_aliases` - Sorted vector of unique aliases
|
||||
pub fn generate_symbol_ids(
|
||||
|
|
@ -1951,21 +1958,29 @@ pub fn generate_symbol_ids(
|
|||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
default_aliases: &AliasMap,
|
||||
) -> (HashMap<Symbol, String>, HashMap<Alias, String>, Vec<Alias>) {
|
||||
) -> (
|
||||
HashMap<Symbol, (String, u16)>,
|
||||
HashMap<Alias, String>,
|
||||
Vec<Alias>,
|
||||
) {
|
||||
let mut symbol_ids = HashMap::new();
|
||||
let mut alias_ids = HashMap::new();
|
||||
let mut unique_aliases = Vec::new();
|
||||
let mut symbol_identifiers = HashSet::new();
|
||||
|
||||
// Generate symbol IDs
|
||||
for i in 0..parse_table.symbols.len() {
|
||||
// Generate symbol IDs with numeric IDs
|
||||
// Symbol::end() gets 0, then other symbols get 1, 2, 3...
|
||||
let mut numeric_id = 0u16;
|
||||
for &symbol in &parse_table.symbols {
|
||||
assign_symbol_id(
|
||||
parse_table.symbols[i],
|
||||
symbol,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&mut symbol_ids,
|
||||
&mut symbol_identifiers,
|
||||
numeric_id,
|
||||
);
|
||||
numeric_id += 1;
|
||||
}
|
||||
|
||||
symbol_ids.insert(
|
||||
|
|
@ -2037,7 +2052,7 @@ pub fn generate_symbol_ids(
|
|||
|
||||
// Some aliases match an existing symbol in the grammar.
|
||||
let alias_id = if let Some(existing_symbol) = matching_symbols.first() {
|
||||
symbol_ids[&symbol_map[existing_symbol]].clone()
|
||||
symbol_ids[&symbol_map[existing_symbol]].0.clone()
|
||||
}
|
||||
// Other aliases don't match any existing symbol, and need their own identifiers.
|
||||
else {
|
||||
|
|
@ -2086,7 +2101,7 @@ pub fn render_c_code(
|
|||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
default_aliases: AliasMap,
|
||||
symbol_ids: HashMap<Symbol, String>,
|
||||
symbol_ids: HashMap<Symbol, (String, u16)>,
|
||||
alias_ids: HashMap<Alias, String>,
|
||||
unique_aliases: Vec<Alias>,
|
||||
abi_version: usize,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue