Implement more C code generation

This commit is contained in:
Max Brunsfeld 2018-12-23 10:16:03 -08:00
parent 99ecf29e4b
commit 5258ee2e6a
6 changed files with 840 additions and 105 deletions

View file

@ -2,7 +2,7 @@ use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}
use crate::rules::Associativity;
use crate::rules::{Symbol, SymbolType};
use smallbitvec::SmallBitVec;
use std::collections::{HashMap, BTreeMap};
use std::collections::BTreeMap;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::u32;
@ -178,7 +178,11 @@ impl<'a> ParseItem<'a> {
}
pub fn prev_step(&self) -> Option<&'a ProductionStep> {
self.production.steps.get(self.step_index as usize - 1)
if self.step_index > 0 {
Some(&self.production.steps[self.step_index as usize - 1])
} else {
None
}
}
pub fn is_done(&self) -> bool {
@ -355,43 +359,49 @@ impl<'a> PartialEq for ParseItem<'a> {
}
}
impl<'a> PartialOrd for ParseItem<'a> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if let Some(o) = self.variable_index.partial_cmp(&other.variable_index) {
return Some(o);
impl<'a> Ord for ParseItem<'a> {
fn cmp(&self, other: &Self) -> Ordering {
let o = self.variable_index.cmp(&other.variable_index);
if o != Ordering::Equal {
return o;
}
if let Some(o) = self.step_index.partial_cmp(&other.step_index) {
return Some(o);
let o = self.step_index.cmp(&other.step_index);
if o != Ordering::Equal {
return o;
}
if let Some(o) = self.production.dynamic_precedence.partial_cmp(&other.production.dynamic_precedence) {
return Some(o);
let o = self.production.dynamic_precedence.cmp(&other.production.dynamic_precedence);
if o != Ordering::Equal {
return o;
}
if let Some(o) = self.production.steps.len().partial_cmp(&other.production.steps.len()) {
return Some(o);
let o = self.production.steps.len().cmp(&other.production.steps.len());
if o != Ordering::Equal {
return o;
}
if let Some(o) = self.precedence().partial_cmp(&other.precedence()) {
return Some(o);
let o = self.precedence().cmp(&other.precedence());
if o != Ordering::Equal {
return o;
}
if let Some(o) = self.associativity().partial_cmp(&other.associativity()) {
return Some(o);
let o = self.associativity().cmp(&other.associativity());
if o != Ordering::Equal {
return o;
}
for (i, step) in self.production.steps.iter().enumerate() {
let cmp = if i < self.step_index as usize {
step.alias.partial_cmp(&other.production.steps[i].alias)
let o = if i < self.step_index as usize {
step.alias.cmp(&other.production.steps[i].alias)
} else {
step.partial_cmp(&other.production.steps[i])
step.cmp(&other.production.steps[i])
};
if let Some(o) = cmp {
return Some(o);
if o != Ordering::Equal {
return o;
}
}
return None;
return Ordering::Equal;
}
}
impl<'a> Ord for ParseItem<'a> {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap_or(Ordering::Equal)
impl<'a> PartialOrd for ParseItem<'a> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

View file

@ -198,15 +198,16 @@ impl<'a> ParseItemSetBuilder<'a> {
if syntax_grammar.variables_to_inline.contains(&non_terminal) {
continue;
}
for (production_index, production) in variable.productions.iter().enumerate() {
for production in &variable.productions {
let item = ParseItem {
variable_index,
production,
step_index: 0,
};
// let step_id = item.as_step_id(syntax_grammar, inlines);
if let Some(inlined_productions) = inlines.inlined_productions(item.production, item.step_index) {
if let Some(inlined_productions) =
inlines.inlined_productions(item.production, item.step_index)
{
for production in inlined_productions {
find_or_push(
additions_for_non_terminal,
@ -244,16 +245,21 @@ impl<'a> ParseItemSetBuilder<'a> {
) -> ParseItemSet<'a> {
let mut result = ParseItemSet::default();
for (item, lookaheads) in &item_set.entries {
if let Some(productions) = inlines.inlined_productions(item.production, item.step_index) {
if let Some(productions) = inlines.inlined_productions(item.production, item.step_index)
{
for production in productions {
self.add_item(&mut result, ParseItem {
variable_index: item.variable_index,
production,
step_index: item.step_index,
}, lookaheads, grammar);
self.add_item(
&mut result,
ParseItem {
variable_index: item.variable_index,
production,
step_index: item.step_index,
},
lookaheads,
);
}
} else {
self.add_item(&mut result, *item, lookaheads, grammar);
self.add_item(&mut result, *item, lookaheads);
}
}
result
@ -268,7 +274,6 @@ impl<'a> ParseItemSetBuilder<'a> {
set: &mut ParseItemSet<'a>,
item: ParseItem<'a>,
lookaheads: &LookaheadSet,
grammar: &SyntaxGrammar,
) {
if let Some(step) = item.step() {
if step.symbol.is_non_terminal() {

View file

@ -0,0 +1,24 @@
use crate::rules::Symbol;
use crate::tables::LexTable;
use crate::grammars::{SyntaxGrammar, LexicalGrammar};
pub(crate) struct LexTableBuilder<'a> {
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
table: LexTable,
}
impl<'a> LexTableBuilder<'a> {
pub fn new(
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
) -> Self {
Self {
syntax_grammar, lexical_grammar, table: LexTable::default()
}
}
pub fn build(self) -> (LexTable, LexTable, Option<Symbol>) {
(LexTable::default(), LexTable::default(), None)
}
}

View file

@ -1,10 +1,13 @@
mod item;
mod item_set_builder;
mod lex_table_builder;
use self::item::{LookaheadSet, ParseItem, ParseItemSet};
use self::item_set_builder::ParseItemSetBuilder;
use self::lex_table_builder::LexTableBuilder;
use crate::error::{Error, Result};
use crate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
use crate::rules::Alias;
use crate::rules::{AliasMap, Associativity, Symbol, SymbolType};
use crate::tables::{
AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
@ -43,7 +46,7 @@ struct ParseTableBuilder<'a> {
impl<'a> ParseTableBuilder<'a> {
fn build(mut self) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
// Ensure that the empty rename sequence has index 0.
// Ensure that the empty alias sequence has index 0.
self.parse_table.alias_sequences.push(Vec::new());
// Ensure that the error state has index 0.
@ -61,9 +64,18 @@ impl<'a> ParseTableBuilder<'a> {
);
self.process_part_state_queue()?;
let lex_table_builder = LexTableBuilder::new(self.syntax_grammar, self.lexical_grammar);
self.populate_used_symbols();
Err(Error::grammar("oh no"))
let (main_lex_table, keyword_lex_table, keyword_capture_token) = lex_table_builder.build();
Ok((
self.parse_table,
main_lex_table,
keyword_lex_table,
keyword_capture_token,
))
}
fn add_parse_state(
@ -82,6 +94,7 @@ impl<'a> ParseTableBuilder<'a> {
let state_id = self.parse_table.states.len();
self.item_sets_by_state_id.push(v.key().clone());
self.parse_table.states.push(ParseState {
lex_state_id: 0,
terminal_entries: HashMap::new(),
nonterminal_entries: HashMap::new(),
});
@ -98,12 +111,16 @@ impl<'a> ParseTableBuilder<'a> {
fn process_part_state_queue(&mut self) -> Result<()> {
while let Some(entry) = self.parse_state_queue.pop_front() {
println!(
"ITEM SET {}:\n{}",
entry.state_id,
self.item_sets_by_state_id[entry.state_id]
.display_with(&self.syntax_grammar, &self.lexical_grammar,)
);
let debug = false;
if debug {
println!(
"ITEM SET {}:\n{}",
entry.state_id,
self.item_sets_by_state_id[entry.state_id]
.display_with(&self.syntax_grammar, &self.lexical_grammar,)
);
}
let item_set = self.item_set_builder.transitive_closure(
&self.item_sets_by_state_id[entry.state_id],
@ -111,11 +128,12 @@ impl<'a> ParseTableBuilder<'a> {
self.inlines,
);
// println!("TRANSITIVE CLOSURE:");
// for item in item_set.entries.keys() {
// println!("{}", item.display_with(&self.syntax_grammar, &self.lexical_grammar, &self.item_set_builder.inlines));
// }
// println!("");
if debug {
println!(
"TRANSITIVE CLOSURE:\n{}",
item_set.display_with(&self.syntax_grammar, &self.lexical_grammar)
);
}
self.add_actions(
entry.preceding_symbols,
@ -249,6 +267,17 @@ impl<'a> ParseTableBuilder<'a> {
)?;
}
let state = &mut self.parse_table.states[state_id];
for extra_token in &self.syntax_grammar.extra_tokens {
state
.terminal_entries
.entry(*extra_token)
.or_insert(ParseTableEntry {
reusable: true,
actions: vec![ParseAction::ShiftExtra],
});
}
Ok(())
}
@ -514,6 +543,7 @@ impl<'a> ParseTableBuilder<'a> {
non_terminal_usages[symbol.index] = true;
}
}
self.parse_table.symbols.push(Symbol::end());
for (i, value) in terminal_usages.into_iter().enumerate() {
if value {
self.parse_table.symbols.push(Symbol::terminal(i));
@ -532,12 +562,15 @@ impl<'a> ParseTableBuilder<'a> {
}
fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
let alias_sequence = item
let mut alias_sequence: Vec<Option<Alias>> = item
.production
.steps
.iter()
.map(|s| s.alias.clone())
.collect();
while alias_sequence.last() == Some(&None) {
alias_sequence.pop();
}
if let Some(index) = self
.parse_table
.alias_sequences

View file

@ -1,8 +1,16 @@
use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use crate::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
use crate::nfa::CharacterSet;
use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::tables::{LexTable, ParseTable, ParseTableEntry};
use crate::tables::{LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
use std::collections::{HashMap, HashSet};
use std::fmt::Write;
use std::mem::swap;
macro_rules! add {
($this: tt, $($arg: tt)*) => {{
$this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
}}
}
macro_rules! add_line {
($this: tt, $($arg: tt)*) => {
@ -14,10 +22,21 @@ macro_rules! add_line {
}
}
macro_rules! indent {
($this: tt) => {
$this.indent_level += 1;
};
}
macro_rules! dedent {
($this: tt) => {
$this.indent_level -= 1;
};
}
struct Generator {
buffer: String,
indent_level: usize,
language_name: String,
parse_table: ParseTable,
main_lex_table: LexTable,
@ -27,9 +46,9 @@ struct Generator {
lexical_grammar: LexicalGrammar,
simple_aliases: AliasMap,
symbol_ids: HashMap<Symbol, String>,
parse_table_entries: Vec<(usize, ParseTableEntry)>,
next_parse_action_list_index: usize,
unique_aliases: HashSet<Alias>,
alias_ids: HashMap<Alias, String>,
external_scanner_states: Vec<HashSet<usize>>,
alias_map: HashMap<Alias, Option<Symbol>>,
}
impl Generator {
@ -39,6 +58,30 @@ impl Generator {
self.add_stats();
self.add_symbol_enum();
self.add_symbol_names_list();
self.add_symbol_metadata_list();
self.add_alias_sequences();
let mut main_lex_table = LexTable::default();
swap(&mut main_lex_table, &mut self.main_lex_table);
self.add_lex_function("ts_lex", main_lex_table);
if self.keyword_capture_token.is_some() {
let mut keyword_lex_table = LexTable::default();
swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
self.add_lex_function("ts_lex_keywords", keyword_lex_table);
}
self.add_lex_modes_list();
if !self.syntax_grammar.external_tokens.is_empty() {
self.add_external_token_enum();
self.add_external_scanner_symbol_map();
self.add_external_scanner_states_list();
}
self.add_parse_table();
self.add_parser_export();
self.buffer
}
@ -50,7 +93,10 @@ impl Generator {
fn add_pragmas(&mut self) {
add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
add_line!(self, "#pragma GCC diagnostic push");
add_line!(self, "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
add_line!(
self,
"#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""
);
add_line!(self, "#endif");
add_line!(self, "");
@ -67,81 +113,639 @@ impl Generator {
}
fn add_stats(&mut self) {
let mut token_count = 0;
for symbol in &self.parse_table.symbols {
if symbol.is_terminal() {
token_count += 1;
} else if symbol.is_external() {
let external_token = &self.syntax_grammar.external_tokens[symbol.index];
if external_token.corresponding_internal_token.is_none() {
token_count += 1;
let token_count = self
.parse_table
.symbols
.iter()
.filter(|symbol| {
if symbol.is_terminal() {
true
} else if symbol.is_external() {
self.syntax_grammar.external_tokens[symbol.index]
.corresponding_internal_token
.is_none()
} else {
false
}
}
})
.count();
let mut symbol_identifiers = HashSet::new();
for i in 0..self.parse_table.symbols.len() {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
}
for alias_sequence in &self.parse_table.alias_sequences {
for entry in alias_sequence {
if let Some(alias) = entry {
self.unique_aliases.insert(alias.clone());
let alias_kind = if alias.is_named {
VariableType::Named
} else {
VariableType::Anonymous
};
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias_kind
});
let alias_id = if let Some(symbol) = matching_symbol {
self.symbol_ids[&symbol].clone()
} else if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
self.alias_map
.entry(alias.clone())
.or_insert(matching_symbol);
}
}
}
let mut symbol_id_values = HashSet::new();
for i in 0..self.parse_table.symbols.len() {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_id_values);
}
add_line!(self, "#define LANGUAGE_VERSION {}", 6);
add_line!(self, "#define STATE_COUNT {}", self.parse_table.states.len());
add_line!(self, "#define SYMBOL_COUNT {}", self.parse_table.symbols.len());
add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
add_line!(
self,
"#define STATE_COUNT {}",
self.parse_table.states.len()
);
add_line!(
self,
"#define SYMBOL_COUNT {}",
self.parse_table.symbols.len()
);
add_line!(
self,
"#define ALIAS_COUNT {}",
self.alias_map.iter().filter(|e| e.1.is_none()).count()
);
add_line!(self, "#define TOKEN_COUNT {}", token_count);
add_line!(self, "#define EXTERNAL_TOKEN_COUNT {}", self.syntax_grammar.external_tokens.len());
// add_line!(self, "#define MAX_ALIAS_SEQUENCE_LENGTH {}\n", self.parse_table.max_alias_sequence_length);
add_line!(
self,
"#define EXTERNAL_TOKEN_COUNT {}",
self.syntax_grammar.external_tokens.len()
);
if let Some(max_alias_sequence_length) = self
.parse_table
.alias_sequences
.iter()
.map(|seq| seq.len())
.max()
{
add_line!(
self,
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
max_alias_sequence_length
);
}
add_line!(self, "");
}
fn add_symbol_enum(&mut self) {
add_line!(self, "enum {{");
self.indent();
for i in 0..self.parse_table.symbols.len() {
let symbol = self.parse_table.symbols[i];
if symbol != Symbol::end() {
add_line!(self, "{} = {}", self.symbol_ids[&symbol], i);
indent!(self);
let mut i = 1;
for symbol in self.parse_table.symbols.iter() {
if *symbol != Symbol::end() {
add_line!(self, "{} = {},", self.symbol_ids[&symbol], i);
i += 1;
}
}
self.dedent();
for (alias, symbol) in &self.alias_map {
if symbol.is_none() {
add_line!(self, "{} = {},", self.alias_ids[&alias], i);
}
i += 1;
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_symbol_names_list(&mut self) {
add_line!(self, "static const char *ts_symbol_names[] = {{");
self.indent();
self.dedent();
indent!(self);
for symbol in self.parse_table.symbols.iter() {
if *symbol != Symbol::end() {
add_line!(
self,
"[{}] = \"{}\",",
self.symbol_ids[&symbol],
self.sanitize_string(self.metadata_for_symbol(*symbol).0)
);
}
}
for (alias, symbol) in &self.alias_map {
if symbol.is_none() {
add_line!(
self,
"[{}] = \"{}\",",
self.alias_ids[&alias],
self.sanitize_string(&alias.value)
);
}
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn assign_symbol_id(&mut self, symbol: Symbol, used_ids: &mut HashSet<String>) {
fn add_symbol_metadata_list(&mut self) {
add_line!(
self,
"static const TSSymbolMetadata ts_symbol_metadata[] = {{"
);
indent!(self);
for symbol in &self.parse_table.symbols {
add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
indent!(self);
match self.metadata_for_symbol(*symbol).1 {
VariableType::Named => {
add_line!(self, ".visible = true,");
add_line!(self, ".named = true,");
}
VariableType::Anonymous => {
add_line!(self, ".visible = true,");
add_line!(self, ".named = false,");
}
VariableType::Hidden => {
add_line!(self, ".visible = false,");
add_line!(self, ".named = true,");
}
VariableType::Auxiliary => {
add_line!(self, ".visible = false,");
add_line!(self, ".named = false,");
}
}
dedent!(self);
add_line!(self, "}},");
}
for (alias, matching_symbol) in &self.alias_map {
if matching_symbol.is_none() {
add_line!(self, "[{}] = {{", self.alias_ids[&alias]);
indent!(self);
add_line!(self, ".visible = true,");
add_line!(self, ".named = {},", alias.is_named);
dedent!(self);
add_line!(self, "}},");
}
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_alias_sequences(&mut self) {
add_line!(
self,
"static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
self.parse_table.alias_sequences.len()
);
indent!(self);
for (i, sequence) in self.parse_table.alias_sequences.iter().enumerate().skip(1) {
add_line!(self, "[{}] = {{", i);
indent!(self);
for (j, alias) in sequence.iter().enumerate() {
if let Some(alias) = alias {
add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
}
}
dedent!(self);
add_line!(self, "}},");
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
add_line!(
self,
"static bool {}(TSLexer *lexer, TSStateId state) {{",
name
);
indent!(self);
add_line!(self, "START_LEXER();");
add_line!(self, "switch (state) {{");
indent!(self);
for (i, state) in lex_table.states.into_iter().enumerate() {
add_line!(self, "case {}:", i);
indent!(self);
self.add_lex_state(state);
dedent!(self);
}
add_line!(self, "default:");
indent!(self);
add_line!(self, "return false;");
dedent!(self);
dedent!(self);
add_line!(self, "}}");
dedent!(self);
add_line!(self, "}}");
add_line!(self, "");
}
fn add_lex_state(&mut self, state: LexState) {
if let Some(accept_action) = state.accept_action {
add_line!(
self,
"ACCEPT_TOKEN({})",
self.symbol_ids[&accept_action.symbol]
);
}
let mut ruled_out_characters = HashSet::new();
for (characters, action) in state.advance_actions {
let previous_length = self.buffer.len();
add!(self, "if (");
if self.add_character_set_condition(&characters, &ruled_out_characters) {
add!(self, ")");
indent!(self);
if action.in_main_token {
add_line!(self, "ADVANCE({});", action.state);
} else {
add_line!(self, "SKIP({});", action.state);
}
if let CharacterSet::Include(chars) = characters {
ruled_out_characters.extend(chars.iter());
}
dedent!(self);
} else {
self.buffer.truncate(previous_length);
}
}
add_line!(self, "END_STATE();");
}
fn add_character_set_condition(
&mut self,
characters: &CharacterSet,
ruled_out_characters: &HashSet<char>,
) -> bool {
true
}
fn add_lex_modes_list(&mut self) {
self.get_external_scanner_state_id(HashSet::new());
let mut external_tokens_by_corresponding_internal_token = HashMap::new();
for (i, external_token) in self.syntax_grammar.external_tokens.iter().enumerate() {
if let Some(symbol) = external_token.corresponding_internal_token {
external_tokens_by_corresponding_internal_token.insert(symbol.index, i);
}
}
add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{");
indent!(self);
for i in 0..self.parse_table.states.len() {
let mut external_tokens = HashSet::new();
for token in self.parse_table.states[i].terminal_entries.keys() {
if token.is_external() {
external_tokens.insert(token.index);
} else if token.is_terminal() {
if let Some(external_index) =
external_tokens_by_corresponding_internal_token.get(&token.index)
{
external_tokens.insert(*external_index);
}
}
}
let external_state_id = self.get_external_scanner_state_id(external_tokens);
let state = &self.parse_table.states[i];
if external_state_id > 0 {
add_line!(
self,
"[{}] = {{.lex_state = {}, .external_lex_state = {}}},",
i,
state.lex_state_id,
external_state_id
);
} else {
add_line!(self, "[{}] = {{.lex_state = {}}},", i, state.lex_state_id);
}
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_external_token_enum(&mut self) {
add_line!(self, "enum {{");
indent!(self);
for i in 0..self.syntax_grammar.external_tokens.len() {
add_line!(
self,
"{} = {},",
self.external_token_id(&self.syntax_grammar.external_tokens[i]),
i
);
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_external_scanner_symbol_map(&mut self) {
add_line!(
self,
"static TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{"
);
indent!(self);
for i in 0..self.syntax_grammar.external_tokens.len() {
add_line!(
self,
"[{}] = {},",
self.external_token_id(&self.syntax_grammar.external_tokens[i]),
self.symbol_ids[&Symbol::external(i)],
);
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_external_scanner_states_list(&mut self) {
add_line!(
self,
"static bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{",
self.external_scanner_states.len(),
);
indent!(self);
for i in 0..self.external_scanner_states.len() {
if !self.external_scanner_states[i].is_empty() {
add_line!(self, "[{}] = {{", i);
indent!(self);
for token_index in &self.external_scanner_states[i] {
add_line!(
self,
"[{}] = true,",
self.external_token_id(&self.syntax_grammar.external_tokens[*token_index])
);
}
dedent!(self);
add_line!(self, "}},");
}
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_parse_table(&mut self) {
let mut parse_table_entries = Vec::new();
let mut next_parse_action_list_index = 0;
self.get_parse_action_list_id(
&ParseTableEntry {
actions: Vec::new(),
reusable: false,
},
&mut parse_table_entries,
&mut next_parse_action_list_index,
);
add_line!(
self,
"static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {{"
);
indent!(self);
for (i, state) in self.parse_table.states.iter().enumerate() {
add_line!(self, "[{}] = {{", i);
indent!(self);
for (symbol, state_id) in &state.nonterminal_entries {
add_line!(self, "[{}] = STATE({}),", self.symbol_ids[symbol], state_id);
}
for (symbol, entry) in &state.terminal_entries {
let entry_id = self.get_parse_action_list_id(
entry,
&mut parse_table_entries,
&mut next_parse_action_list_index,
);
add_line!(
self,
"[{}] = ACTIONS({}),",
self.symbol_ids[symbol],
entry_id
);
}
dedent!(self);
add_line!(self, "}},");
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
self.add_parse_action_list(parse_table_entries);
}
fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) {
add_line!(self, "static TSParseActionEntry ts_parse_actions[] = {{");
indent!(self);
for (i, entry) in parse_table_entries {
add!(
self,
" [{}] = {{.count = {}, .reusable = {}}},",
i,
entry.actions.len(),
entry.reusable
);
for action in entry.actions {
add!(self, " ");
match action {
ParseAction::Accept => add!(self, " ACCEPT_INPUT()"),
ParseAction::Recover => add!(self, "RECOVER()"),
ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"),
ParseAction::Shift {
state,
is_repetition,
} => {
if is_repetition {
add!(self, "SHIFT_REPEAT({})", state);
} else {
add!(self, "SHIFT({})", state);
}
}
ParseAction::Reduce {
symbol,
child_count,
dynamic_precedence,
alias_sequence_id,
..
} => {
if !self.symbol_ids.contains_key(&symbol) {
eprintln!(
"SYMBOL: {:?} {:?}",
symbol,
self.metadata_for_symbol(symbol)
);
}
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
if dynamic_precedence != 0 {
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
}
if alias_sequence_id != 0 {
add!(self, ", .alias_sequence_id = {}", alias_sequence_id);
}
add!(self, ")");
}
}
add!(self, ",")
}
add!(self, "\n");
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_parser_export(&mut self) {
let language_function_name = format!("tree_sitter_{}", self.language_name);
let external_scanner_name = format!("{}_external_scanner", language_function_name);
if !self.syntax_grammar.external_tokens.is_empty() {
add_line!(self, "void *{}_create();", external_scanner_name);
add_line!(self, "void {}_destroy(void *);", external_scanner_name);
add_line!(
self,
"bool {}_scan(void *, TSLexer *, const bool *);",
external_scanner_name
);
add_line!(
self,
"unsigned {}_serialize(void *, char *);",
external_scanner_name
);
add_line!(
self,
"void {}_deserialize(void *, const char *, unsigned);",
external_scanner_name
);
add_line!(self, "");
}
add_line!(self, "#ifdef _WIN32");
add_line!(self, "#define extern __declspec(dllexport)");
add_line!(self, "#endif");
add_line!(self, "");
add_line!(
self,
"extern const TSLanguage *{}() {{",
language_function_name
);
indent!(self);
add_line!(self, "static TSLanguage language = {{");
indent!(self);
add_line!(self, ".version = LANGUAGE_VERSION,");
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
add_line!(self, ".alias_count = ALIAS_COUNT,");
add_line!(self, ".token_count = TOKEN_COUNT,");
add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
add_line!(
self,
".parse_table = (const unsigned short *)ts_parse_table,"
);
add_line!(self, ".parse_actions = ts_parse_actions,");
add_line!(self, ".lex_modes = ts_lex_modes,");
add_line!(self, ".symbol_names = ts_symbol_names,");
add_line!(
self,
".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
);
add_line!(
self,
".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
);
add_line!(self, ".lex_fn = ts_lex,");
if let Some(keyword_capture_token) = self.keyword_capture_token {
add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
add_line!(
self,
".keyword_capture_token = {},",
self.symbol_ids[&keyword_capture_token]
);
}
add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
if !self.syntax_grammar.external_tokens.is_empty() {
add_line!(self, ".external_scanner = {{");
indent!(self);
add_line!(self, "(const bool *)ts_external_scanner_states,");
add_line!(self, "ts_external_scanner_symbol_map,");
add_line!(self, "{}_create,", external_scanner_name);
add_line!(self, "{}_destroy,", external_scanner_name);
add_line!(self, "{}_scan,", external_scanner_name);
add_line!(self, "{}_serialize,", external_scanner_name);
add_line!(self, "{}_deserialize,", external_scanner_name);
dedent!(self);
add_line!(self, "}},");
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "return &language;");
dedent!(self);
add_line!(self, "}}");
}
fn get_parse_action_list_id(
&self,
entry: &ParseTableEntry,
parse_table_entries: &mut Vec<(usize, ParseTableEntry)>,
next_parse_action_list_index: &mut usize,
) -> usize {
if let Some((index, _)) = parse_table_entries.iter().find(|(_, e)| *e == *entry) {
return *index;
}
let result = *next_parse_action_list_index;
parse_table_entries.push((result, entry.clone()));
*next_parse_action_list_index += 1 + entry.actions.len();
result
}
fn get_external_scanner_state_id(&mut self, external_tokens: HashSet<usize>) -> usize {
self.external_scanner_states
.iter()
.position(|tokens| *tokens == external_tokens)
.unwrap_or_else(|| {
self.external_scanner_states.push(external_tokens);
self.external_scanner_states.len() - 1
})
}
fn external_token_id(&self, token: &ExternalToken) -> String {
format!(
"ts_external_token_{}",
self.sanitize_identifier(&token.name)
)
}
fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet<String>) {
let mut id;
if symbol == Symbol::end() {
id = "ts_builtin_sym_end".to_string();
} else {
let (name, kind) = self.metadata_for_symbol(symbol);
id = match kind {
VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_name(name)),
VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_name(name)),
VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)),
VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)),
VariableType::Hidden | VariableType::Named => {
format!("sym_{}", self.sanitize_name(name))
format!("sym_{}", self.sanitize_identifier(name))
}
};
let mut suffix_number = 1;
let mut suffix = String::new();
while used_ids.contains(&id) {
while used_identifiers.contains(&id) {
id.drain(id.len() - suffix.len()..);
suffix_number += 1;
suffix = suffix_number.to_string();
@ -149,7 +753,7 @@ impl Generator {
}
}
used_ids.insert(id.clone());
used_identifiers.insert(id.clone());
self.symbol_ids.insert(symbol, id);
}
@ -171,16 +775,67 @@ impl Generator {
}
}
fn sanitize_name(&self, name: &str) -> String {
name.to_string()
fn sanitize_identifier(&self, name: &str) -> String {
let mut result = String::with_capacity(name.len());
for c in name.chars() {
if ('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
|| c == '_'
{
result.push(c);
} else {
result += match c {
'~' => "TILDE",
'`' => "BQUOTE",
'!' => "BANG",
'@' => "AT",
'#' => "POUND",
'$' => "DOLLAR",
'%' => "PERCENT",
'^' => "CARET",
'&' => "AMP",
'*' => "STAR",
'(' => "LPAREN",
')' => "RPAREN",
'-' => "DASH",
'+' => "PLUS",
'=' => "EQ",
'{' => "LBRACE",
'}' => "RBRACE",
'[' => "LBRACK",
']' => "RBRACK",
'\\' => "BSLASH",
'|' => "PIPE",
':' => "COLON",
';' => "SEMI",
'"' => "DQUOTE",
'\'' => "SQUOTE",
'<' => "LT",
'>' => "GT",
',' => "COMMA",
'.' => "DOT",
'?' => "QMARK",
'/' => "SLASH",
'\n' => "LF",
'\r' => "CR",
'\t' => "TAB",
_ => continue,
}
}
}
result
}
fn indent(&mut self) {
self.indent_level += 1;
}
fn dedent(&mut self) {
self.indent_level -= 1;
fn sanitize_string(&self, name: &str) -> String {
let mut result = String::with_capacity(name.len());
for c in name.chars() {
if ['\\', '\n', '\r', '\"'].contains(&c) {
result.push('\\');
}
result.push(c);
}
result
}
}
@ -206,9 +861,9 @@ pub(crate) fn render_c_code(
lexical_grammar,
simple_aliases,
symbol_ids: HashMap::new(),
parse_table_entries: Vec::new(),
next_parse_action_list_index: 0,
unique_aliases: HashSet::new(),
alias_ids: HashMap::new(),
external_scanner_states: Vec::new(),
alias_map: HashMap::new(),
}
.generate()
}

View file

@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::ops::Range;
use crate::rules::{Associativity, Symbol, Alias};
use crate::nfa::CharacterSet;
pub(crate) type AliasSequenceId = usize;
pub(crate) type ParseStateId = usize;
@ -34,7 +35,8 @@ pub(crate) struct ParseTableEntry {
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseState {
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
pub nonterminal_entries: HashMap<Symbol, ParseStateId>
pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
pub lex_state_id: usize,
}
#[derive(Debug, PartialEq, Eq)]
@ -60,7 +62,7 @@ pub(crate) struct AcceptTokenAction {
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct LexState {
pub advance_actions: HashMap<Symbol, AdvanceAction>,
pub advance_actions: HashMap<CharacterSet, AdvanceAction>,
pub accept_action: Option<AcceptTokenAction>,
}
@ -78,6 +80,12 @@ impl ParseTableEntry {
}
}
impl Default for LexTable {
fn default() -> Self {
LexTable { states: Vec::new() }
}
}
impl ParseAction {
pub fn precedence(&self) -> i32 {
if let ParseAction::Reduce { precedence, .. } = self {