refactor: extract symbol ID generation into dedicated module
This commit is contained in:
parent
e029188319
commit
0ad40ec263
2 changed files with 310 additions and 311 deletions
|
|
@ -1,12 +1,15 @@
|
|||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
fmt::Write,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
build_tables::{build_tables, Tables},
|
||||
grammars::{InputGrammar, LexicalGrammar, SyntaxGrammar},
|
||||
grammars::{InputGrammar, LexicalGrammar, SyntaxGrammar, VariableType},
|
||||
node_types::{self, ChildType, VariableInfo},
|
||||
prepare_grammar::prepare_grammar,
|
||||
render::generate_symbol_ids,
|
||||
rules::{Alias, Symbol},
|
||||
rules::{Alias, AliasMap, Symbol, SymbolType},
|
||||
tables::ParseTable,
|
||||
GenerateError, OptLevel,
|
||||
};
|
||||
|
||||
|
|
@ -64,3 +67,298 @@ pub fn introspect_grammar(
|
|||
unique_aliases,
|
||||
})
|
||||
}
|
||||
|
||||
/// Generates symbol IDs and alias IDs for the given parse table and grammars.
|
||||
///
|
||||
/// This function must be called before `render_c_code` to generate the symbol mappings
|
||||
/// that will be used in the generated C code.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `parse_table` - The generated parse table for the language
|
||||
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
|
||||
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
|
||||
/// * `default_aliases` - A map describing the global rename rules that should apply
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A tuple containing:
|
||||
/// * `symbol_ids` - HashMap mapping each Symbol to (C identifier string, numeric ID)
|
||||
/// * `alias_ids` - HashMap mapping each Alias to its C identifier string
|
||||
/// * `unique_aliases` - Sorted vector of unique aliases
|
||||
pub fn generate_symbol_ids(
|
||||
parse_table: &ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
default_aliases: &AliasMap,
|
||||
) -> (
|
||||
HashMap<Symbol, (String, u16)>,
|
||||
HashMap<Alias, String>,
|
||||
Vec<Alias>,
|
||||
) {
|
||||
let mut symbol_ids = HashMap::new();
|
||||
let mut alias_ids = HashMap::new();
|
||||
let mut unique_aliases = Vec::new();
|
||||
let mut symbol_identifiers = HashSet::new();
|
||||
|
||||
// Generate symbol IDs with numeric IDs
|
||||
// Symbol::end() gets 0, then other symbols get 1, 2, 3...
|
||||
let mut numeric_id = 0u16;
|
||||
for &symbol in &parse_table.symbols {
|
||||
assign_symbol_id(
|
||||
symbol,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&mut symbol_ids,
|
||||
&mut symbol_identifiers,
|
||||
numeric_id,
|
||||
);
|
||||
numeric_id += 1;
|
||||
}
|
||||
|
||||
symbol_ids.insert(
|
||||
Symbol::end_of_nonterminal_extra(),
|
||||
symbol_ids[&Symbol::end()].clone(),
|
||||
);
|
||||
|
||||
// Build symbol map to find canonical symbols for aliases
|
||||
let mut symbol_map = HashMap::new();
|
||||
for symbol in &parse_table.symbols {
|
||||
let mut mapping = symbol;
|
||||
|
||||
if let Some(alias) = default_aliases.get(symbol) {
|
||||
let kind = alias.kind();
|
||||
for other_symbol in &parse_table.symbols {
|
||||
if let Some(other_alias) = default_aliases.get(other_symbol) {
|
||||
if other_symbol < mapping && other_alias == alias {
|
||||
mapping = other_symbol;
|
||||
}
|
||||
} else {
|
||||
let (other_name, other_kind) =
|
||||
metadata_for_symbol(*other_symbol, syntax_grammar, lexical_grammar);
|
||||
if (other_name, other_kind) == (alias.value.as_str(), kind) {
|
||||
mapping = other_symbol;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if symbol.is_terminal() {
|
||||
let metadata = metadata_for_symbol(*symbol, syntax_grammar, lexical_grammar);
|
||||
for other_symbol in &parse_table.symbols {
|
||||
let other_metadata =
|
||||
metadata_for_symbol(*other_symbol, syntax_grammar, lexical_grammar);
|
||||
if other_metadata == metadata {
|
||||
if let Some(mapped) = symbol_map.get(other_symbol) {
|
||||
if mapped == symbol {
|
||||
break;
|
||||
}
|
||||
}
|
||||
mapping = other_symbol;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
symbol_map.insert(*symbol, *mapping);
|
||||
}
|
||||
|
||||
// Generate alias IDs
|
||||
for production_info in &parse_table.production_infos {
|
||||
for alias in &production_info.alias_sequence {
|
||||
if let Some(alias) = &alias {
|
||||
// Find symbols that match this alias
|
||||
let matching_symbols: Vec<Symbol> = parse_table
|
||||
.symbols
|
||||
.iter()
|
||||
.copied()
|
||||
.filter(|symbol| {
|
||||
default_aliases.get(symbol).map_or_else(
|
||||
|| {
|
||||
let (name, kind) =
|
||||
metadata_for_symbol(*symbol, syntax_grammar, lexical_grammar);
|
||||
name == alias.value && kind == alias.kind()
|
||||
},
|
||||
|default_alias| default_alias == alias,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Some aliases match an existing symbol in the grammar.
|
||||
let alias_id = if let Some(existing_symbol) = matching_symbols.first() {
|
||||
symbol_ids[&symbol_map[existing_symbol]].0.clone()
|
||||
}
|
||||
// Other aliases don't match any existing symbol, and need their own identifiers.
|
||||
else {
|
||||
if let Err(i) = unique_aliases.binary_search(alias) {
|
||||
unique_aliases.insert(i, alias.clone());
|
||||
}
|
||||
|
||||
if alias.is_named {
|
||||
format!("alias_sym_{}", sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", sanitize_identifier(&alias.value))
|
||||
}
|
||||
};
|
||||
|
||||
alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(symbol_ids, alias_ids, unique_aliases)
|
||||
}
|
||||
|
||||
/// Helper function to sanitize identifiers for C code generation.
|
||||
pub fn sanitize_identifier(name: &str) -> String {
|
||||
let mut result = String::with_capacity(name.len());
|
||||
for c in name.chars() {
|
||||
if c.is_ascii_alphanumeric() || c == '_' {
|
||||
result.push(c);
|
||||
} else {
|
||||
'special_chars: {
|
||||
let replacement = match c {
|
||||
' ' if name.len() == 1 => "SPACE",
|
||||
'~' => "TILDE",
|
||||
'`' => "BQUOTE",
|
||||
'!' => "BANG",
|
||||
'@' => "AT",
|
||||
'#' => "POUND",
|
||||
'$' => "DOLLAR",
|
||||
'%' => "PERCENT",
|
||||
'^' => "CARET",
|
||||
'&' => "AMP",
|
||||
'*' => "STAR",
|
||||
'(' => "LPAREN",
|
||||
')' => "RPAREN",
|
||||
'-' => "DASH",
|
||||
'+' => "PLUS",
|
||||
'=' => "EQ",
|
||||
'{' => "LBRACE",
|
||||
'}' => "RBRACE",
|
||||
'[' => "LBRACK",
|
||||
']' => "RBRACK",
|
||||
'\\' => "BSLASH",
|
||||
'|' => "PIPE",
|
||||
':' => "COLON",
|
||||
';' => "SEMI",
|
||||
'"' => "DQUOTE",
|
||||
'\'' => "SQUOTE",
|
||||
'<' => "LT",
|
||||
'>' => "GT",
|
||||
',' => "COMMA",
|
||||
'.' => "DOT",
|
||||
'?' => "QMARK",
|
||||
'/' => "SLASH",
|
||||
'\n' => "LF",
|
||||
'\r' => "CR",
|
||||
'\t' => "TAB",
|
||||
'\0' => "NULL",
|
||||
'\u{0001}' => "SOH",
|
||||
'\u{0002}' => "STX",
|
||||
'\u{0003}' => "ETX",
|
||||
'\u{0004}' => "EOT",
|
||||
'\u{0005}' => "ENQ",
|
||||
'\u{0006}' => "ACK",
|
||||
'\u{0007}' => "BEL",
|
||||
'\u{0008}' => "BS",
|
||||
'\u{000b}' => "VTAB",
|
||||
'\u{000c}' => "FF",
|
||||
'\u{000e}' => "SO",
|
||||
'\u{000f}' => "SI",
|
||||
'\u{0010}' => "DLE",
|
||||
'\u{0011}' => "DC1",
|
||||
'\u{0012}' => "DC2",
|
||||
'\u{0013}' => "DC3",
|
||||
'\u{0014}' => "DC4",
|
||||
'\u{0015}' => "NAK",
|
||||
'\u{0016}' => "SYN",
|
||||
'\u{0017}' => "ETB",
|
||||
'\u{0018}' => "CAN",
|
||||
'\u{0019}' => "EM",
|
||||
'\u{001a}' => "SUB",
|
||||
'\u{001b}' => "ESC",
|
||||
'\u{001c}' => "FS",
|
||||
'\u{001d}' => "GS",
|
||||
'\u{001e}' => "RS",
|
||||
'\u{001f}' => "US",
|
||||
'\u{007F}' => "DEL",
|
||||
'\u{FEFF}' => "BOM",
|
||||
'\u{0080}'..='\u{FFFF}' => {
|
||||
write!(result, "u{:04x}", c as u32).unwrap();
|
||||
break 'special_chars;
|
||||
}
|
||||
'\u{10000}'..='\u{10FFFF}' => {
|
||||
write!(result, "U{:08x}", c as u32).unwrap();
|
||||
break 'special_chars;
|
||||
}
|
||||
'0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
|
||||
' ' => break 'special_chars,
|
||||
};
|
||||
if !result.is_empty() && !result.ends_with('_') {
|
||||
result.push('_');
|
||||
}
|
||||
result += replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Helper function to get metadata for a symbol.
|
||||
pub fn metadata_for_symbol<'a>(
|
||||
symbol: Symbol,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
) -> (&'a str, VariableType) {
|
||||
match symbol.kind {
|
||||
SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden),
|
||||
SymbolType::NonTerminal => {
|
||||
let variable = &syntax_grammar.variables[symbol.index];
|
||||
(&variable.name as &str, variable.kind)
|
||||
}
|
||||
SymbolType::Terminal => {
|
||||
let variable = &lexical_grammar.variables[symbol.index];
|
||||
(&variable.name as &str, variable.kind)
|
||||
}
|
||||
SymbolType::External => {
|
||||
let token = &syntax_grammar.external_tokens[symbol.index];
|
||||
(&token.name as &str, token.kind)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to assign a symbol ID.
|
||||
fn assign_symbol_id(
|
||||
symbol: Symbol,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
symbol_ids: &mut HashMap<Symbol, (String, u16)>,
|
||||
used_identifiers: &mut HashSet<String>,
|
||||
numeric_id: u16,
|
||||
) {
|
||||
let mut id;
|
||||
if symbol == Symbol::end() {
|
||||
id = "ts_builtin_sym_end".to_string();
|
||||
} else {
|
||||
let (name, kind) = metadata_for_symbol(symbol, syntax_grammar, lexical_grammar);
|
||||
id = match kind {
|
||||
VariableType::Auxiliary => format!("aux_sym_{}", sanitize_identifier(name)),
|
||||
VariableType::Anonymous => format!("anon_sym_{}", sanitize_identifier(name)),
|
||||
VariableType::Hidden | VariableType::Named => {
|
||||
format!("sym_{}", sanitize_identifier(name))
|
||||
}
|
||||
};
|
||||
|
||||
let mut suffix_number = 1;
|
||||
let mut suffix = String::new();
|
||||
while used_identifiers.contains(&id) {
|
||||
id.drain(id.len() - suffix.len()..);
|
||||
suffix_number += 1;
|
||||
suffix = suffix_number.to_string();
|
||||
id += &suffix;
|
||||
}
|
||||
}
|
||||
|
||||
used_identifiers.insert(id.clone());
|
||||
symbol_ids.insert(symbol, (id, numeric_id));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
use std::{
|
||||
cmp,
|
||||
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
|
||||
collections::{BTreeMap, BTreeSet, HashMap},
|
||||
fmt::Write,
|
||||
mem::swap,
|
||||
};
|
||||
|
||||
use crate::LANGUAGE_VERSION;
|
||||
use crate::{
|
||||
introspect_grammar::{metadata_for_symbol, sanitize_identifier},
|
||||
LANGUAGE_VERSION,
|
||||
};
|
||||
use indoc::indoc;
|
||||
|
||||
use super::{
|
||||
|
|
@ -245,9 +248,9 @@ impl Generator {
|
|||
}
|
||||
|
||||
if alias.is_named {
|
||||
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
format!("alias_sym_{}", sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
format!("anon_alias_sym_{}", sanitize_identifier(&alias.value))
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -1700,10 +1703,7 @@ impl Generator {
|
|||
}
|
||||
|
||||
fn external_token_id(&self, token: &ExternalToken) -> String {
|
||||
format!(
|
||||
"ts_external_token_{}",
|
||||
self.sanitize_identifier(&token.name)
|
||||
)
|
||||
format!("ts_external_token_{}", sanitize_identifier(&token.name))
|
||||
}
|
||||
|
||||
fn field_id(&self, field_name: &str) -> String {
|
||||
|
|
@ -1731,10 +1731,6 @@ impl Generator {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn sanitize_identifier(&self, name: &str) -> String {
|
||||
sanitize_identifier(name)
|
||||
}
|
||||
|
||||
fn sanitize_string(&self, name: &str) -> String {
|
||||
let mut result = String::with_capacity(name.len());
|
||||
for c in name.chars() {
|
||||
|
|
@ -1780,301 +1776,6 @@ impl Generator {
|
|||
}
|
||||
}
|
||||
|
||||
/// Helper function to sanitize identifiers for C code generation.
|
||||
fn sanitize_identifier(name: &str) -> String {
|
||||
let mut result = String::with_capacity(name.len());
|
||||
for c in name.chars() {
|
||||
if c.is_ascii_alphanumeric() || c == '_' {
|
||||
result.push(c);
|
||||
} else {
|
||||
'special_chars: {
|
||||
let replacement = match c {
|
||||
' ' if name.len() == 1 => "SPACE",
|
||||
'~' => "TILDE",
|
||||
'`' => "BQUOTE",
|
||||
'!' => "BANG",
|
||||
'@' => "AT",
|
||||
'#' => "POUND",
|
||||
'$' => "DOLLAR",
|
||||
'%' => "PERCENT",
|
||||
'^' => "CARET",
|
||||
'&' => "AMP",
|
||||
'*' => "STAR",
|
||||
'(' => "LPAREN",
|
||||
')' => "RPAREN",
|
||||
'-' => "DASH",
|
||||
'+' => "PLUS",
|
||||
'=' => "EQ",
|
||||
'{' => "LBRACE",
|
||||
'}' => "RBRACE",
|
||||
'[' => "LBRACK",
|
||||
']' => "RBRACK",
|
||||
'\\' => "BSLASH",
|
||||
'|' => "PIPE",
|
||||
':' => "COLON",
|
||||
';' => "SEMI",
|
||||
'"' => "DQUOTE",
|
||||
'\'' => "SQUOTE",
|
||||
'<' => "LT",
|
||||
'>' => "GT",
|
||||
',' => "COMMA",
|
||||
'.' => "DOT",
|
||||
'?' => "QMARK",
|
||||
'/' => "SLASH",
|
||||
'\n' => "LF",
|
||||
'\r' => "CR",
|
||||
'\t' => "TAB",
|
||||
'\0' => "NULL",
|
||||
'\u{0001}' => "SOH",
|
||||
'\u{0002}' => "STX",
|
||||
'\u{0003}' => "ETX",
|
||||
'\u{0004}' => "EOT",
|
||||
'\u{0005}' => "ENQ",
|
||||
'\u{0006}' => "ACK",
|
||||
'\u{0007}' => "BEL",
|
||||
'\u{0008}' => "BS",
|
||||
'\u{000b}' => "VTAB",
|
||||
'\u{000c}' => "FF",
|
||||
'\u{000e}' => "SO",
|
||||
'\u{000f}' => "SI",
|
||||
'\u{0010}' => "DLE",
|
||||
'\u{0011}' => "DC1",
|
||||
'\u{0012}' => "DC2",
|
||||
'\u{0013}' => "DC3",
|
||||
'\u{0014}' => "DC4",
|
||||
'\u{0015}' => "NAK",
|
||||
'\u{0016}' => "SYN",
|
||||
'\u{0017}' => "ETB",
|
||||
'\u{0018}' => "CAN",
|
||||
'\u{0019}' => "EM",
|
||||
'\u{001a}' => "SUB",
|
||||
'\u{001b}' => "ESC",
|
||||
'\u{001c}' => "FS",
|
||||
'\u{001d}' => "GS",
|
||||
'\u{001e}' => "RS",
|
||||
'\u{001f}' => "US",
|
||||
'\u{007F}' => "DEL",
|
||||
'\u{FEFF}' => "BOM",
|
||||
'\u{0080}'..='\u{FFFF}' => {
|
||||
write!(result, "u{:04x}", c as u32).unwrap();
|
||||
break 'special_chars;
|
||||
}
|
||||
'\u{10000}'..='\u{10FFFF}' => {
|
||||
write!(result, "U{:08x}", c as u32).unwrap();
|
||||
break 'special_chars;
|
||||
}
|
||||
'0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
|
||||
' ' => break 'special_chars,
|
||||
};
|
||||
if !result.is_empty() && !result.ends_with('_') {
|
||||
result.push('_');
|
||||
}
|
||||
result += replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Helper function to get metadata for a symbol.
|
||||
fn metadata_for_symbol<'a>(
|
||||
symbol: Symbol,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
) -> (&'a str, VariableType) {
|
||||
match symbol.kind {
|
||||
SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden),
|
||||
SymbolType::NonTerminal => {
|
||||
let variable = &syntax_grammar.variables[symbol.index];
|
||||
(&variable.name as &str, variable.kind)
|
||||
}
|
||||
SymbolType::Terminal => {
|
||||
let variable = &lexical_grammar.variables[symbol.index];
|
||||
(&variable.name as &str, variable.kind)
|
||||
}
|
||||
SymbolType::External => {
|
||||
let token = &syntax_grammar.external_tokens[symbol.index];
|
||||
(&token.name as &str, token.kind)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to assign a symbol ID.
|
||||
fn assign_symbol_id(
|
||||
symbol: Symbol,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
symbol_ids: &mut HashMap<Symbol, (String, u16)>,
|
||||
used_identifiers: &mut HashSet<String>,
|
||||
numeric_id: u16,
|
||||
) {
|
||||
let mut id;
|
||||
if symbol == Symbol::end() {
|
||||
id = "ts_builtin_sym_end".to_string();
|
||||
} else {
|
||||
let (name, kind) = metadata_for_symbol(symbol, syntax_grammar, lexical_grammar);
|
||||
id = match kind {
|
||||
VariableType::Auxiliary => format!("aux_sym_{}", sanitize_identifier(name)),
|
||||
VariableType::Anonymous => format!("anon_sym_{}", sanitize_identifier(name)),
|
||||
VariableType::Hidden | VariableType::Named => {
|
||||
format!("sym_{}", sanitize_identifier(name))
|
||||
}
|
||||
};
|
||||
|
||||
let mut suffix_number = 1;
|
||||
let mut suffix = String::new();
|
||||
while used_identifiers.contains(&id) {
|
||||
id.drain(id.len() - suffix.len()..);
|
||||
suffix_number += 1;
|
||||
suffix = suffix_number.to_string();
|
||||
id += &suffix;
|
||||
}
|
||||
}
|
||||
|
||||
used_identifiers.insert(id.clone());
|
||||
symbol_ids.insert(symbol, (id, numeric_id));
|
||||
}
|
||||
|
||||
/// Generates symbol IDs and alias IDs for the given parse table and grammars.
|
||||
///
|
||||
/// This function must be called before `render_c_code` to generate the symbol mappings
|
||||
/// that will be used in the generated C code.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `parse_table` - The generated parse table for the language
|
||||
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
|
||||
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
|
||||
/// * `default_aliases` - A map describing the global rename rules that should apply
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A tuple containing:
|
||||
/// * `symbol_ids` - HashMap mapping each Symbol to (C identifier string, numeric ID)
|
||||
/// * `alias_ids` - HashMap mapping each Alias to its C identifier string
|
||||
/// * `unique_aliases` - Sorted vector of unique aliases
|
||||
pub fn generate_symbol_ids(
|
||||
parse_table: &ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
default_aliases: &AliasMap,
|
||||
) -> (
|
||||
HashMap<Symbol, (String, u16)>,
|
||||
HashMap<Alias, String>,
|
||||
Vec<Alias>,
|
||||
) {
|
||||
let mut symbol_ids = HashMap::new();
|
||||
let mut alias_ids = HashMap::new();
|
||||
let mut unique_aliases = Vec::new();
|
||||
let mut symbol_identifiers = HashSet::new();
|
||||
|
||||
// Generate symbol IDs with numeric IDs
|
||||
// Symbol::end() gets 0, then other symbols get 1, 2, 3...
|
||||
let mut numeric_id = 0u16;
|
||||
for &symbol in &parse_table.symbols {
|
||||
assign_symbol_id(
|
||||
symbol,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&mut symbol_ids,
|
||||
&mut symbol_identifiers,
|
||||
numeric_id,
|
||||
);
|
||||
numeric_id += 1;
|
||||
}
|
||||
|
||||
symbol_ids.insert(
|
||||
Symbol::end_of_nonterminal_extra(),
|
||||
symbol_ids[&Symbol::end()].clone(),
|
||||
);
|
||||
|
||||
// Build symbol map to find canonical symbols for aliases
|
||||
let mut symbol_map = HashMap::new();
|
||||
for symbol in &parse_table.symbols {
|
||||
let mut mapping = symbol;
|
||||
|
||||
if let Some(alias) = default_aliases.get(symbol) {
|
||||
let kind = alias.kind();
|
||||
for other_symbol in &parse_table.symbols {
|
||||
if let Some(other_alias) = default_aliases.get(other_symbol) {
|
||||
if other_symbol < mapping && other_alias == alias {
|
||||
mapping = other_symbol;
|
||||
}
|
||||
} else {
|
||||
let (other_name, other_kind) =
|
||||
metadata_for_symbol(*other_symbol, syntax_grammar, lexical_grammar);
|
||||
if (other_name, other_kind) == (alias.value.as_str(), kind) {
|
||||
mapping = other_symbol;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if symbol.is_terminal() {
|
||||
let metadata = metadata_for_symbol(*symbol, syntax_grammar, lexical_grammar);
|
||||
for other_symbol in &parse_table.symbols {
|
||||
let other_metadata =
|
||||
metadata_for_symbol(*other_symbol, syntax_grammar, lexical_grammar);
|
||||
if other_metadata == metadata {
|
||||
if let Some(mapped) = symbol_map.get(other_symbol) {
|
||||
if mapped == symbol {
|
||||
break;
|
||||
}
|
||||
}
|
||||
mapping = other_symbol;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
symbol_map.insert(*symbol, *mapping);
|
||||
}
|
||||
|
||||
// Generate alias IDs
|
||||
for production_info in &parse_table.production_infos {
|
||||
for alias in &production_info.alias_sequence {
|
||||
if let Some(alias) = &alias {
|
||||
// Find symbols that match this alias
|
||||
let matching_symbols: Vec<Symbol> = parse_table
|
||||
.symbols
|
||||
.iter()
|
||||
.copied()
|
||||
.filter(|symbol| {
|
||||
default_aliases.get(symbol).map_or_else(
|
||||
|| {
|
||||
let (name, kind) =
|
||||
metadata_for_symbol(*symbol, syntax_grammar, lexical_grammar);
|
||||
name == alias.value && kind == alias.kind()
|
||||
},
|
||||
|default_alias| default_alias == alias,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Some aliases match an existing symbol in the grammar.
|
||||
let alias_id = if let Some(existing_symbol) = matching_symbols.first() {
|
||||
symbol_ids[&symbol_map[existing_symbol]].0.clone()
|
||||
}
|
||||
// Other aliases don't match any existing symbol, and need their own identifiers.
|
||||
else {
|
||||
if let Err(i) = unique_aliases.binary_search(alias) {
|
||||
unique_aliases.insert(i, alias.clone());
|
||||
}
|
||||
|
||||
if alias.is_named {
|
||||
format!("alias_sym_{}", sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", sanitize_identifier(&alias.value))
|
||||
}
|
||||
};
|
||||
|
||||
alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(symbol_ids, alias_ids, unique_aliases)
|
||||
}
|
||||
|
||||
/// Returns a String of C code for the given components of a parser.
|
||||
///
|
||||
/// # Arguments
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue