2022-01-17 14:45:07 -08:00
|
|
|
use super::{
|
|
|
|
|
char_tree::{CharacterTree, Comparator},
|
|
|
|
|
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
|
|
|
|
|
rules::{Alias, AliasMap, Symbol, SymbolType},
|
|
|
|
|
tables::{
|
|
|
|
|
AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
|
|
|
|
|
ParseTableEntry,
|
|
|
|
|
},
|
2019-02-07 17:18:33 -08:00
|
|
|
};
|
2019-01-02 12:34:40 -08:00
|
|
|
use core::ops::Range;
|
2022-01-17 14:45:07 -08:00
|
|
|
use std::{
|
|
|
|
|
cmp,
|
|
|
|
|
collections::{HashMap, HashSet},
|
|
|
|
|
fmt::Write,
|
|
|
|
|
mem::swap,
|
|
|
|
|
};
|
2019-08-28 17:14:04 -07:00
|
|
|
|
2020-05-26 13:39:11 -07:00
|
|
|
const LARGE_CHARACTER_RANGE_COUNT: usize = 8;
|
2021-02-25 16:32:05 -08:00
|
|
|
const SMALL_STATE_THRESHOLD: usize = 64;
|
2022-01-17 14:45:07 -08:00
|
|
|
const ABI_VERSION_MIN: usize = 13;
|
|
|
|
|
const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
|
|
|
|
|
const ABI_VERSION_WITH_PRIMARY_STATES: usize = 14;
|
2020-05-26 13:39:11 -07:00
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
macro_rules! add {
|
|
|
|
|
($this: tt, $($arg: tt)*) => {{
|
|
|
|
|
$this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
|
|
|
|
|
}}
|
|
|
|
|
}
|
2018-12-20 13:36:39 -08:00
|
|
|
|
2019-01-02 12:34:40 -08:00
|
|
|
macro_rules! add_whitespace {
|
|
|
|
|
($this: tt) => {{
|
2018-12-20 13:36:39 -08:00
|
|
|
for _ in 0..$this.indent_level {
|
|
|
|
|
write!(&mut $this.buffer, " ").unwrap();
|
|
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! add_line {
|
|
|
|
|
($this: tt, $($arg: tt)*) => {
|
|
|
|
|
add_whitespace!($this);
|
2018-12-20 13:36:39 -08:00
|
|
|
$this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
|
|
|
|
|
$this.buffer += "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
macro_rules! indent {
|
|
|
|
|
($this: tt) => {
|
|
|
|
|
$this.indent_level += 1;
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! dedent {
|
|
|
|
|
($this: tt) => {
|
2019-06-19 21:08:59 -07:00
|
|
|
assert_ne!($this.indent_level, 0);
|
2018-12-23 10:16:03 -08:00
|
|
|
$this.indent_level -= 1;
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
struct Generator {
|
|
|
|
|
buffer: String,
|
|
|
|
|
indent_level: usize,
|
|
|
|
|
language_name: String,
|
|
|
|
|
parse_table: ParseTable,
|
|
|
|
|
main_lex_table: LexTable,
|
|
|
|
|
keyword_lex_table: LexTable,
|
2019-05-16 16:59:50 -07:00
|
|
|
large_state_count: usize,
|
2018-12-20 13:36:39 -08:00
|
|
|
keyword_capture_token: Option<Symbol>,
|
|
|
|
|
syntax_grammar: SyntaxGrammar,
|
|
|
|
|
lexical_grammar: LexicalGrammar,
|
2020-10-27 15:46:09 -07:00
|
|
|
default_aliases: AliasMap,
|
2019-05-16 16:59:50 -07:00
|
|
|
symbol_order: HashMap<Symbol, usize>,
|
2018-12-20 13:36:39 -08:00
|
|
|
symbol_ids: HashMap<Symbol, String>,
|
2018-12-23 10:16:03 -08:00
|
|
|
alias_ids: HashMap<Alias, String>,
|
2020-08-21 14:12:04 -07:00
|
|
|
unique_aliases: Vec<Alias>,
|
|
|
|
|
symbol_map: HashMap<Symbol, Symbol>,
|
2019-02-07 12:29:20 -08:00
|
|
|
field_names: Vec<String>,
|
2021-02-25 16:32:05 -08:00
|
|
|
|
|
|
|
|
#[allow(unused)]
|
2022-01-17 14:45:07 -08:00
|
|
|
abi_version: usize,
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
|
2020-05-26 13:39:11 -07:00
|
|
|
struct TransitionSummary {
|
|
|
|
|
is_included: bool,
|
|
|
|
|
ranges: Vec<Range<char>>,
|
|
|
|
|
call_id: Option<usize>,
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-26 16:37:45 -07:00
|
|
|
struct LargeCharacterSetInfo {
|
|
|
|
|
ranges: Vec<Range<char>>,
|
|
|
|
|
symbol: Symbol,
|
|
|
|
|
index: usize,
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
impl Generator {
|
|
|
|
|
fn generate(mut self) -> String {
|
2019-02-07 12:29:20 -08:00
|
|
|
self.init();
|
2018-12-20 13:36:39 -08:00
|
|
|
self.add_includes();
|
|
|
|
|
self.add_pragmas();
|
|
|
|
|
self.add_stats();
|
|
|
|
|
self.add_symbol_enum();
|
|
|
|
|
self.add_symbol_names_list();
|
2020-06-08 16:07:22 -07:00
|
|
|
self.add_unique_symbol_map();
|
2018-12-23 10:16:03 -08:00
|
|
|
self.add_symbol_metadata_list();
|
2019-01-12 21:57:34 -08:00
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
if !self.field_names.is_empty() {
|
|
|
|
|
self.add_field_name_enum();
|
2019-02-07 12:29:20 -08:00
|
|
|
self.add_field_name_names_list();
|
2019-02-07 17:18:33 -08:00
|
|
|
self.add_field_sequences();
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-11 09:22:30 -07:00
|
|
|
if !self.parse_table.production_infos.is_empty() {
|
2019-02-07 17:18:33 -08:00
|
|
|
self.add_alias_sequences();
|
2019-01-12 21:57:34 -08:00
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
|
2021-02-25 16:32:05 -08:00
|
|
|
self.add_non_terminal_alias_map();
|
2022-03-02 14:57:59 -08:00
|
|
|
|
|
|
|
|
if self.abi_version >= ABI_VERSION_WITH_PRIMARY_STATES {
|
|
|
|
|
self.add_primary_state_id_list();
|
|
|
|
|
}
|
2020-08-21 14:12:04 -07:00
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
let mut main_lex_table = LexTable::default();
|
|
|
|
|
swap(&mut main_lex_table, &mut self.main_lex_table);
|
2020-05-26 13:39:11 -07:00
|
|
|
self.add_lex_function("ts_lex", main_lex_table, true);
|
2018-12-23 10:16:03 -08:00
|
|
|
|
|
|
|
|
if self.keyword_capture_token.is_some() {
|
|
|
|
|
let mut keyword_lex_table = LexTable::default();
|
|
|
|
|
swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
|
2020-05-26 13:39:11 -07:00
|
|
|
self.add_lex_function("ts_lex_keywords", keyword_lex_table, false);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.add_lex_modes_list();
|
|
|
|
|
|
|
|
|
|
if !self.syntax_grammar.external_tokens.is_empty() {
|
|
|
|
|
self.add_external_token_enum();
|
|
|
|
|
self.add_external_scanner_symbol_map();
|
|
|
|
|
self.add_external_scanner_states_list();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.add_parse_table();
|
|
|
|
|
self.add_parser_export();
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
self.buffer
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 12:29:20 -08:00
|
|
|
fn init(&mut self) {
|
|
|
|
|
let mut symbol_identifiers = HashSet::new();
|
|
|
|
|
for i in 0..self.parse_table.symbols.len() {
|
|
|
|
|
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
|
|
|
|
|
}
|
2021-02-18 15:43:01 -08:00
|
|
|
self.symbol_ids.insert(
|
|
|
|
|
Symbol::end_of_nonterminal_extra(),
|
|
|
|
|
self.symbol_ids[&Symbol::end()].clone(),
|
|
|
|
|
);
|
2019-02-07 12:29:20 -08:00
|
|
|
|
2020-08-21 14:12:04 -07:00
|
|
|
self.symbol_map = self
|
|
|
|
|
.parse_table
|
|
|
|
|
.symbols
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|symbol| {
|
|
|
|
|
let mut mapping = symbol;
|
|
|
|
|
|
|
|
|
|
// There can be multiple symbols in the grammar that have the same name and kind,
|
|
|
|
|
// due to simple aliases. When that happens, ensure that they map to the same
|
|
|
|
|
// public-facing symbol. If one of the symbols is not aliased, choose that one
|
|
|
|
|
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
|
|
|
|
|
// numeric value.
|
2020-10-27 15:46:09 -07:00
|
|
|
if let Some(alias) = self.default_aliases.get(symbol) {
|
2020-08-21 14:12:04 -07:00
|
|
|
let kind = alias.kind();
|
|
|
|
|
for other_symbol in &self.parse_table.symbols {
|
2020-10-27 15:46:09 -07:00
|
|
|
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
|
2020-08-21 14:12:04 -07:00
|
|
|
if other_symbol < mapping && other_alias == alias {
|
|
|
|
|
mapping = other_symbol;
|
|
|
|
|
}
|
|
|
|
|
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
|
|
|
|
|
mapping = other_symbol;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Two anonymous tokens with different flags but the same string value
|
|
|
|
|
// should be represented with the same symbol in the public API. Examples:
|
|
|
|
|
// * "<" and token(prec(1, "<"))
|
|
|
|
|
// * "(" and token.immediate("(")
|
|
|
|
|
else if symbol.is_terminal() {
|
|
|
|
|
let metadata = self.metadata_for_symbol(*symbol);
|
|
|
|
|
for other_symbol in &self.parse_table.symbols {
|
|
|
|
|
let other_metadata = self.metadata_for_symbol(*other_symbol);
|
|
|
|
|
if other_metadata == metadata {
|
|
|
|
|
mapping = other_symbol;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
(*symbol, *mapping)
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
|
2020-11-02 13:43:28 -08:00
|
|
|
for production_info in &self.parse_table.production_infos {
|
|
|
|
|
// Build a list of all field names
|
|
|
|
|
for field_name in production_info.field_map.keys() {
|
|
|
|
|
if let Err(i) = self.field_names.binary_search(&field_name) {
|
|
|
|
|
self.field_names.insert(i, field_name.clone());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for alias in &production_info.alias_sequence {
|
|
|
|
|
// Generate a mapping from aliases to C identifiers.
|
|
|
|
|
if let Some(alias) = &alias {
|
|
|
|
|
let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
|
|
|
|
|
if let Some(default_alias) = self.default_aliases.get(symbol) {
|
|
|
|
|
default_alias == alias
|
|
|
|
|
} else {
|
|
|
|
|
let (name, kind) = self.metadata_for_symbol(*symbol);
|
|
|
|
|
name == alias.value && kind == alias.kind()
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Some aliases match an existing symbol in the grammar.
|
|
|
|
|
let alias_id;
|
|
|
|
|
if let Some(existing_symbol) = existing_symbol {
|
|
|
|
|
alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
|
|
|
|
|
}
|
|
|
|
|
// Other aliases don't match any existing symbol, and need their own identifiers.
|
|
|
|
|
else {
|
|
|
|
|
if let Err(i) = self.unique_aliases.binary_search(alias) {
|
|
|
|
|
self.unique_aliases.insert(i, alias.clone());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
alias_id = if alias.is_named {
|
|
|
|
|
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
|
|
|
|
} else {
|
|
|
|
|
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-08-28 17:14:04 -07:00
|
|
|
|
2020-11-02 13:43:28 -08:00
|
|
|
// Determine which states should use the "small state" representation, and which should
|
|
|
|
|
// use the normal array representation.
|
2020-06-08 16:07:22 -07:00
|
|
|
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
|
|
|
|
self.large_state_count = self
|
|
|
|
|
.parse_table
|
|
|
|
|
.states
|
|
|
|
|
.iter()
|
|
|
|
|
.enumerate()
|
|
|
|
|
.take_while(|(i, s)| {
|
|
|
|
|
*i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
|
|
|
|
|
})
|
|
|
|
|
.count();
|
2019-02-07 12:29:20 -08:00
|
|
|
}
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
fn add_includes(&mut self) {
|
|
|
|
|
add_line!(self, "#include <tree_sitter/parser.h>");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_pragmas(&mut self) {
|
|
|
|
|
add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
|
|
|
|
|
add_line!(self, "#pragma GCC diagnostic push");
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""
|
|
|
|
|
);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "#endif");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
|
2019-02-06 11:50:31 -08:00
|
|
|
// Compiling large lexer functions can be very slow. Disabling optimizations
|
|
|
|
|
// is not ideal, but only a very small fraction of overall parse time is
|
|
|
|
|
// spent lexing, so the performance impact of this is negligible.
|
|
|
|
|
if self.main_lex_table.states.len() > 300 {
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "#ifdef _MSC_VER");
|
|
|
|
|
add_line!(self, "#pragma optimize(\"\", off)");
|
2019-02-06 11:50:31 -08:00
|
|
|
add_line!(self, "#elif defined(__clang__)");
|
|
|
|
|
add_line!(self, "#pragma clang optimize off");
|
|
|
|
|
add_line!(self, "#elif defined(__GNUC__)");
|
|
|
|
|
add_line!(self, "#pragma GCC optimize (\"O0\")");
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "#endif");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_stats(&mut self) {
|
2018-12-23 10:16:03 -08:00
|
|
|
let token_count = self
|
|
|
|
|
.parse_table
|
|
|
|
|
.symbols
|
|
|
|
|
.iter()
|
|
|
|
|
.filter(|symbol| {
|
2019-01-03 10:31:14 -08:00
|
|
|
if symbol.is_terminal() || symbol.is_eof() {
|
2018-12-23 10:16:03 -08:00
|
|
|
true
|
|
|
|
|
} else if symbol.is_external() {
|
|
|
|
|
self.syntax_grammar.external_tokens[symbol.index]
|
|
|
|
|
.corresponding_internal_token
|
|
|
|
|
.is_none()
|
|
|
|
|
} else {
|
|
|
|
|
false
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
})
|
|
|
|
|
.count();
|
|
|
|
|
|
2022-01-17 14:45:07 -08:00
|
|
|
add_line!(self, "#define LANGUAGE_VERSION {}", self.abi_version);
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define STATE_COUNT {}",
|
|
|
|
|
self.parse_table.states.len()
|
|
|
|
|
);
|
2020-06-08 16:07:22 -07:00
|
|
|
add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
|
2019-08-28 17:14:04 -07:00
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define SYMBOL_COUNT {}",
|
|
|
|
|
self.parse_table.symbols.len()
|
|
|
|
|
);
|
2020-08-21 14:12:04 -07:00
|
|
|
add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len(),);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "#define TOKEN_COUNT {}", token_count);
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define EXTERNAL_TOKEN_COUNT {}",
|
|
|
|
|
self.syntax_grammar.external_tokens.len()
|
|
|
|
|
);
|
2019-02-07 12:29:20 -08:00
|
|
|
add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
|
2019-01-11 17:26:45 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
2019-02-08 16:14:18 -08:00
|
|
|
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
|
|
|
|
|
self.parse_table.max_aliased_production_length
|
2019-01-11 17:26:45 -08:00
|
|
|
);
|
2021-02-25 16:32:05 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define PRODUCTION_ID_COUNT {}",
|
|
|
|
|
self.parse_table.production_infos.len()
|
|
|
|
|
);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_symbol_enum(&mut self) {
|
|
|
|
|
add_line!(self, "enum {{");
|
2018-12-23 10:16:03 -08:00
|
|
|
indent!(self);
|
2019-05-16 16:59:50 -07:00
|
|
|
self.symbol_order.insert(Symbol::end(), 0);
|
2018-12-23 10:16:03 -08:00
|
|
|
let mut i = 1;
|
|
|
|
|
for symbol in self.parse_table.symbols.iter() {
|
|
|
|
|
if *symbol != Symbol::end() {
|
2019-05-16 16:59:50 -07:00
|
|
|
self.symbol_order.insert(*symbol, i);
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "{} = {},", self.symbol_ids[&symbol], i);
|
|
|
|
|
i += 1;
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
}
|
2020-08-21 14:12:04 -07:00
|
|
|
for alias in &self.unique_aliases {
|
|
|
|
|
add_line!(self, "{} = {},", self.alias_ids[&alias], i);
|
|
|
|
|
i += 1;
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
dedent!(self);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_symbol_names_list(&mut self) {
|
2021-05-19 12:49:57 +02:00
|
|
|
add_line!(self, "static const char * const ts_symbol_names[] = {{");
|
2018-12-23 10:16:03 -08:00
|
|
|
indent!(self);
|
|
|
|
|
for symbol in self.parse_table.symbols.iter() {
|
2019-01-04 11:19:53 -08:00
|
|
|
let name = self.sanitize_string(
|
2020-10-27 15:46:09 -07:00
|
|
|
self.default_aliases
|
2019-01-04 11:19:53 -08:00
|
|
|
.get(symbol)
|
|
|
|
|
.map(|alias| alias.value.as_str())
|
|
|
|
|
.unwrap_or(self.metadata_for_symbol(*symbol).0),
|
|
|
|
|
);
|
|
|
|
|
add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
2020-08-21 14:12:04 -07:00
|
|
|
for alias in &self.unique_aliases {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = \"{}\",",
|
|
|
|
|
self.alias_ids[&alias],
|
|
|
|
|
self.sanitize_string(&alias.value)
|
|
|
|
|
);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
fn add_unique_symbol_map(&mut self) {
|
2021-04-26 21:03:35 +02:00
|
|
|
add_line!(self, "static const TSSymbol ts_symbol_map[] = {{");
|
2019-12-05 17:21:46 -08:00
|
|
|
indent!(self);
|
|
|
|
|
for symbol in &self.parse_table.symbols {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {},",
|
2020-08-21 14:12:04 -07:00
|
|
|
self.symbol_ids[symbol],
|
|
|
|
|
self.symbol_ids[&self.symbol_map[symbol]],
|
2019-12-05 17:21:46 -08:00
|
|
|
);
|
|
|
|
|
}
|
2019-12-06 12:11:09 -08:00
|
|
|
|
2020-08-21 14:12:04 -07:00
|
|
|
for alias in &self.unique_aliases {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {},",
|
|
|
|
|
self.alias_ids[&alias],
|
|
|
|
|
self.alias_ids[&alias],
|
|
|
|
|
);
|
2019-12-06 12:11:09 -08:00
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 12:29:20 -08:00
|
|
|
fn add_field_name_enum(&mut self) {
|
|
|
|
|
add_line!(self, "enum {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
for (i, field_name) in self.field_names.iter().enumerate() {
|
|
|
|
|
add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_field_name_names_list(&mut self) {
|
2021-05-19 12:49:57 +02:00
|
|
|
add_line!(self, "static const char * const ts_field_names[] = {{");
|
2019-02-07 12:29:20 -08:00
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, "[0] = NULL,");
|
|
|
|
|
for field_name in &self.field_names {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = \"{}\",",
|
|
|
|
|
self.field_id(field_name),
|
|
|
|
|
field_name
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn add_symbol_metadata_list(&mut self) {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const TSSymbolMetadata ts_symbol_metadata[] = {{"
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
|
|
|
|
for symbol in &self.parse_table.symbols {
|
|
|
|
|
add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
|
|
|
|
|
indent!(self);
|
2020-10-27 15:46:09 -07:00
|
|
|
if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
|
2019-01-03 14:08:24 -08:00
|
|
|
add_line!(self, ".visible = true,");
|
|
|
|
|
add_line!(self, ".named = {},", is_named);
|
|
|
|
|
} else {
|
|
|
|
|
match self.metadata_for_symbol(*symbol).1 {
|
|
|
|
|
VariableType::Named => {
|
|
|
|
|
add_line!(self, ".visible = true,");
|
|
|
|
|
add_line!(self, ".named = true,");
|
|
|
|
|
}
|
|
|
|
|
VariableType::Anonymous => {
|
|
|
|
|
add_line!(self, ".visible = true,");
|
|
|
|
|
add_line!(self, ".named = false,");
|
|
|
|
|
}
|
|
|
|
|
VariableType::Hidden => {
|
|
|
|
|
add_line!(self, ".visible = false,");
|
|
|
|
|
add_line!(self, ".named = true,");
|
2020-09-21 12:34:48 -07:00
|
|
|
if self.syntax_grammar.supertype_symbols.contains(symbol) {
|
|
|
|
|
add_line!(self, ".supertype = true,");
|
|
|
|
|
}
|
2019-01-03 14:08:24 -08:00
|
|
|
}
|
|
|
|
|
VariableType::Auxiliary => {
|
|
|
|
|
add_line!(self, ".visible = false,");
|
|
|
|
|
add_line!(self, ".named = false,");
|
|
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
2020-08-21 14:12:04 -07:00
|
|
|
for alias in &self.unique_aliases {
|
|
|
|
|
add_line!(self, "[{}] = {{", self.alias_ids[&alias]);
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, ".visible = true,");
|
|
|
|
|
add_line!(self, ".named = {},", alias.is_named);
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
dedent!(self);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
fn add_alias_sequences(&mut self) {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
2021-04-26 21:03:35 +02:00
|
|
|
"static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-02-12 11:06:18 -08:00
|
|
|
for (i, production_info) in self.parse_table.production_infos.iter().enumerate() {
|
|
|
|
|
if production_info.alias_sequence.is_empty() {
|
2019-08-12 14:11:40 -07:00
|
|
|
// Work around MSVC's intolerance of empty array initializers by
|
|
|
|
|
// explicitly zero-initializing the first element.
|
|
|
|
|
if i == 0 {
|
2019-08-12 14:50:24 -07:00
|
|
|
add_line!(self, "[0] = {{0}},");
|
2019-08-12 14:11:40 -07:00
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "[{}] = {{", i);
|
|
|
|
|
indent!(self);
|
2019-02-12 11:06:18 -08:00
|
|
|
for (j, alias) in production_info.alias_sequence.iter().enumerate() {
|
2019-02-07 17:18:33 -08:00
|
|
|
if let Some(alias) = alias {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
2019-02-07 17:18:33 -08:00
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
|
2020-08-21 14:12:04 -07:00
|
|
|
fn add_non_terminal_alias_map(&mut self) {
|
2020-10-28 12:34:11 -07:00
|
|
|
let mut alias_ids_by_symbol = HashMap::new();
|
2020-08-21 14:12:04 -07:00
|
|
|
for variable in &self.syntax_grammar.variables {
|
|
|
|
|
for production in &variable.productions {
|
|
|
|
|
for step in &production.steps {
|
|
|
|
|
if let Some(alias) = &step.alias {
|
|
|
|
|
if step.symbol.is_non_terminal()
|
2020-10-27 15:46:09 -07:00
|
|
|
&& Some(alias) != self.default_aliases.get(&step.symbol)
|
2020-08-21 14:12:04 -07:00
|
|
|
{
|
|
|
|
|
if self.symbol_ids.contains_key(&step.symbol) {
|
2020-10-28 12:34:11 -07:00
|
|
|
if let Some(alias_id) = self.alias_ids.get(&alias) {
|
|
|
|
|
let alias_ids = alias_ids_by_symbol
|
|
|
|
|
.entry(step.symbol)
|
|
|
|
|
.or_insert(Vec::new());
|
|
|
|
|
if let Err(i) = alias_ids.binary_search(&alias_id) {
|
|
|
|
|
alias_ids.insert(i, alias_id);
|
|
|
|
|
}
|
2020-08-21 14:12:04 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-28 12:34:11 -07:00
|
|
|
let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
|
|
|
|
|
alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
|
2020-08-21 14:12:04 -07:00
|
|
|
|
2021-05-16 17:55:58 +03:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const uint16_t ts_non_terminal_alias_map[] = {{"
|
|
|
|
|
);
|
2020-08-21 14:12:04 -07:00
|
|
|
indent!(self);
|
2020-10-28 12:34:11 -07:00
|
|
|
for (symbol, alias_ids) in alias_ids_by_symbol {
|
2020-08-21 14:12:04 -07:00
|
|
|
let symbol_id = &self.symbol_ids[symbol];
|
|
|
|
|
let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]];
|
2020-10-28 12:34:11 -07:00
|
|
|
add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len());
|
2020-08-21 14:12:04 -07:00
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, "{},", public_symbol_id);
|
2020-10-28 12:34:11 -07:00
|
|
|
for alias_id in alias_ids {
|
|
|
|
|
add_line!(self, "{},", alias_id);
|
2020-08-21 14:12:04 -07:00
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
}
|
|
|
|
|
add_line!(self, "0,");
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-11 21:57:06 -08:00
|
|
|
/// Produces a list of the "primary state" for every state in the grammar.
|
|
|
|
|
///
|
|
|
|
|
/// The "primary state" for a given state is the first encountered state that behaves
|
|
|
|
|
/// identically with respect to query analysis. We derive this by keeping track of the `core_id`
|
|
|
|
|
/// for each state and treating the first state with a given `core_id` as primary.
|
|
|
|
|
fn add_primary_state_id_list(&mut self) {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const TSStateId ts_primary_state_ids[STATE_COUNT] = {{"
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
|
|
|
|
let mut first_state_for_each_core_id = HashMap::new();
|
|
|
|
|
for (idx, state) in self.parse_table.states.iter().enumerate() {
|
|
|
|
|
let primary_state = first_state_for_each_core_id
|
|
|
|
|
.entry(state.core_id)
|
|
|
|
|
.or_insert(idx);
|
|
|
|
|
add_line!(self, "[{}] = {},", idx, primary_state);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
fn add_field_sequences(&mut self) {
|
|
|
|
|
let mut flat_field_maps = vec![];
|
2019-02-08 16:06:29 -08:00
|
|
|
let mut next_flat_field_map_index = 0;
|
2019-02-07 17:18:33 -08:00
|
|
|
self.get_field_map_id(
|
|
|
|
|
&Vec::new(),
|
|
|
|
|
&mut flat_field_maps,
|
|
|
|
|
&mut next_flat_field_map_index,
|
2019-02-07 12:29:20 -08:00
|
|
|
);
|
2019-02-07 17:18:33 -08:00
|
|
|
|
|
|
|
|
let mut field_map_ids = Vec::new();
|
2019-02-12 11:06:18 -08:00
|
|
|
for production_info in &self.parse_table.production_infos {
|
|
|
|
|
if !production_info.field_map.is_empty() {
|
2019-02-07 17:18:33 -08:00
|
|
|
let mut flat_field_map = Vec::new();
|
2019-02-12 11:06:18 -08:00
|
|
|
for (field_name, locations) in &production_info.field_map {
|
2019-02-07 17:18:33 -08:00
|
|
|
for location in locations {
|
|
|
|
|
flat_field_map.push((field_name.clone(), *location));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
field_map_ids.push((
|
|
|
|
|
self.get_field_map_id(
|
|
|
|
|
&flat_field_map,
|
|
|
|
|
&mut flat_field_maps,
|
|
|
|
|
&mut next_flat_field_map_index,
|
|
|
|
|
),
|
|
|
|
|
flat_field_map.len(),
|
|
|
|
|
));
|
|
|
|
|
} else {
|
|
|
|
|
field_map_ids.push((0, 0));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-08 16:06:29 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
2021-02-25 16:32:05 -08:00
|
|
|
"static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{",
|
2019-02-08 16:06:29 -08:00
|
|
|
);
|
2019-02-07 12:29:20 -08:00
|
|
|
indent!(self);
|
2019-02-12 11:06:18 -08:00
|
|
|
for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
|
2019-02-07 17:18:33 -08:00
|
|
|
if length > 0 {
|
2019-02-08 16:06:29 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {{.index = {}, .length = {}}},",
|
2019-02-12 11:06:18 -08:00
|
|
|
production_id,
|
2019-02-08 16:06:29 -08:00
|
|
|
row_id,
|
|
|
|
|
length
|
|
|
|
|
);
|
2019-02-07 12:29:20 -08:00
|
|
|
}
|
2019-02-07 17:18:33 -08:00
|
|
|
}
|
2019-02-08 16:06:29 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
2019-02-07 12:29:20 -08:00
|
|
|
|
2019-02-08 16:06:29 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const TSFieldMapEntry ts_field_map_entries[] = {{",
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-02-07 17:18:33 -08:00
|
|
|
for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
|
|
|
|
|
add_line!(self, "[{}] =", row_index);
|
2019-02-07 12:29:20 -08:00
|
|
|
indent!(self);
|
2019-02-07 17:18:33 -08:00
|
|
|
for (field_name, location) in field_pairs {
|
2019-02-08 16:06:29 -08:00
|
|
|
add_whitespace!(self);
|
|
|
|
|
add!(self, "{{{}, {}", self.field_id(&field_name), location.index);
|
|
|
|
|
if location.inherited {
|
|
|
|
|
add!(self, ", .inherited = true");
|
|
|
|
|
}
|
|
|
|
|
add!(self, "}},\n");
|
2019-02-07 12:29:20 -08:00
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
}
|
2019-02-07 17:18:33 -08:00
|
|
|
|
2019-02-07 12:29:20 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
2020-05-26 13:39:11 -07:00
|
|
|
fn add_lex_function(
|
|
|
|
|
&mut self,
|
|
|
|
|
name: &str,
|
|
|
|
|
lex_table: LexTable,
|
|
|
|
|
extract_helper_functions: bool,
|
|
|
|
|
) {
|
|
|
|
|
let mut ruled_out_chars = HashSet::new();
|
2020-05-26 16:37:45 -07:00
|
|
|
let mut large_character_sets = Vec::<LargeCharacterSetInfo>::new();
|
2020-05-26 13:39:11 -07:00
|
|
|
|
|
|
|
|
// For each lex state, compute a summary of the code that needs to be
|
|
|
|
|
// generated.
|
|
|
|
|
let state_transition_summaries: Vec<Vec<TransitionSummary>> = lex_table
|
|
|
|
|
.states
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|state| {
|
|
|
|
|
ruled_out_chars.clear();
|
|
|
|
|
|
|
|
|
|
// For each state transition, compute the set of character ranges
|
|
|
|
|
// that need to be checked.
|
|
|
|
|
state
|
|
|
|
|
.advance_actions
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|(chars, action)| {
|
2021-01-28 14:58:29 -08:00
|
|
|
let is_included = !chars.contains(std::char::MAX);
|
|
|
|
|
let mut ranges;
|
2020-05-26 13:39:11 -07:00
|
|
|
if is_included {
|
2021-01-28 14:58:29 -08:00
|
|
|
ranges = chars.simplify_ignoring(&ruled_out_chars);
|
|
|
|
|
ruled_out_chars.extend(chars.iter());
|
2020-05-26 13:39:11 -07:00
|
|
|
} else {
|
2021-01-28 14:58:29 -08:00
|
|
|
ranges = chars.clone().negate().simplify_ignoring(&ruled_out_chars);
|
2020-05-26 13:39:11 -07:00
|
|
|
ranges.insert(0, '\0'..'\0')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Record any large character sets so that they can be extracted
|
|
|
|
|
// into helper functions, reducing code duplication.
|
2021-01-28 14:58:29 -08:00
|
|
|
let mut call_id = None;
|
2020-05-26 13:39:11 -07:00
|
|
|
if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT {
|
|
|
|
|
let char_set_symbol = self
|
|
|
|
|
.symbol_for_advance_action(action, &lex_table)
|
|
|
|
|
.expect("No symbol for lex state");
|
|
|
|
|
let mut count_for_symbol = 0;
|
2020-05-26 16:37:45 -07:00
|
|
|
for (i, info) in large_character_sets.iter_mut().enumerate() {
|
|
|
|
|
if info.ranges == ranges {
|
2020-05-26 13:39:11 -07:00
|
|
|
call_id = Some(i);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-05-26 16:37:45 -07:00
|
|
|
if info.symbol == char_set_symbol {
|
2020-05-26 13:39:11 -07:00
|
|
|
count_for_symbol += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if call_id.is_none() {
|
|
|
|
|
call_id = Some(large_character_sets.len());
|
2020-05-26 16:37:45 -07:00
|
|
|
large_character_sets.push(LargeCharacterSetInfo {
|
|
|
|
|
symbol: char_set_symbol,
|
|
|
|
|
index: count_for_symbol + 1,
|
|
|
|
|
ranges: ranges.clone(),
|
|
|
|
|
});
|
2020-05-26 13:39:11 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TransitionSummary {
|
|
|
|
|
is_included,
|
|
|
|
|
ranges,
|
|
|
|
|
call_id,
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect()
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
// Generate a helper function for each large character set.
|
|
|
|
|
let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect();
|
2020-05-26 16:37:45 -07:00
|
|
|
sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index));
|
|
|
|
|
for info in sorted_large_char_sets {
|
2021-03-09 15:01:26 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static inline bool {}_character_set_{}(int32_t c) {{",
|
|
|
|
|
self.symbol_ids[&info.symbol],
|
|
|
|
|
info.index
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_whitespace!(self);
|
|
|
|
|
add!(self, "return ");
|
|
|
|
|
let tree = CharacterTree::from_ranges(&info.ranges);
|
|
|
|
|
self.add_character_tree(tree.as_ref());
|
|
|
|
|
add!(self, ";\n");
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}}");
|
|
|
|
|
add_line!(self, "");
|
2020-05-26 13:39:11 -07:00
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static bool {}(TSLexer *lexer, TSStateId state) {{",
|
|
|
|
|
name
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-10-31 14:32:10 -07:00
|
|
|
|
2020-06-08 16:07:22 -07:00
|
|
|
add_line!(self, "START_LEXER();");
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "switch (state) {{");
|
|
|
|
|
|
2020-06-08 16:07:22 -07:00
|
|
|
indent!(self);
|
2018-12-23 10:16:03 -08:00
|
|
|
for (i, state) in lex_table.states.into_iter().enumerate() {
|
|
|
|
|
add_line!(self, "case {}:", i);
|
|
|
|
|
indent!(self);
|
2020-05-26 13:39:11 -07:00
|
|
|
self.add_lex_state(state, &state_transition_summaries[i], &large_character_sets);
|
2018-12-23 10:16:03 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add_line!(self, "default:");
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, "return false;");
|
|
|
|
|
dedent!(self);
|
|
|
|
|
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}}");
|
2020-06-08 16:07:22 -07:00
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}}");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-26 13:39:11 -07:00
|
|
|
fn symbol_for_advance_action(
|
|
|
|
|
&self,
|
|
|
|
|
action: &AdvanceAction,
|
|
|
|
|
lex_table: &LexTable,
|
|
|
|
|
) -> Option<Symbol> {
|
|
|
|
|
let mut state_ids = vec![action.state];
|
|
|
|
|
let mut i = 0;
|
|
|
|
|
while i < state_ids.len() {
|
|
|
|
|
let id = state_ids[i];
|
|
|
|
|
let state = &lex_table.states[id];
|
|
|
|
|
if let Some(accept) = state.accept_action {
|
|
|
|
|
return Some(accept);
|
|
|
|
|
}
|
|
|
|
|
for (_, action) in &state.advance_actions {
|
|
|
|
|
if !state_ids.contains(&action.state) {
|
|
|
|
|
state_ids.push(action.state);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
i += 1;
|
|
|
|
|
}
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_lex_state(
|
|
|
|
|
&mut self,
|
|
|
|
|
state: LexState,
|
|
|
|
|
transition_info: &Vec<TransitionSummary>,
|
2020-05-26 16:37:45 -07:00
|
|
|
large_character_sets: &Vec<LargeCharacterSetInfo>,
|
2020-05-26 13:39:11 -07:00
|
|
|
) {
|
2018-12-23 10:16:03 -08:00
|
|
|
if let Some(accept_action) = state.accept_action {
|
2019-01-03 14:08:24 -08:00
|
|
|
add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
2019-10-30 17:11:15 -07:00
|
|
|
if let Some(eof_action) = state.eof_action {
|
|
|
|
|
add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-26 13:39:11 -07:00
|
|
|
for (i, (_, action)) in state.advance_actions.into_iter().enumerate() {
|
|
|
|
|
let transition = &transition_info[i];
|
2019-01-02 12:34:40 -08:00
|
|
|
add_whitespace!(self);
|
2020-05-26 13:39:11 -07:00
|
|
|
|
|
|
|
|
// If there is a helper function for this transition's character
|
|
|
|
|
// set, then generate a call to that helper function.
|
|
|
|
|
if let Some(call_id) = transition.call_id {
|
2020-05-26 16:37:45 -07:00
|
|
|
let info = &large_character_sets[call_id];
|
2021-03-09 15:01:26 -08:00
|
|
|
add!(self, "if (");
|
|
|
|
|
if !transition.is_included {
|
|
|
|
|
add!(self, "!");
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
2021-03-09 15:01:26 -08:00
|
|
|
add!(
|
|
|
|
|
self,
|
|
|
|
|
"{}_character_set_{}(lookahead)) ",
|
|
|
|
|
self.symbol_ids[&info.symbol],
|
|
|
|
|
info.index
|
|
|
|
|
);
|
|
|
|
|
self.add_advance_action(&action);
|
|
|
|
|
add!(self, "\n");
|
|
|
|
|
continue;
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
2020-05-26 16:37:45 -07:00
|
|
|
|
2020-05-26 13:39:11 -07:00
|
|
|
// Otherwise, generate code to compare the lookahead character
|
|
|
|
|
// with all of the character ranges.
|
2020-05-26 16:37:45 -07:00
|
|
|
if transition.ranges.len() > 0 {
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, "if (");
|
|
|
|
|
self.add_character_range_conditions(&transition.ranges, transition.is_included, 2);
|
|
|
|
|
add!(self, ") ");
|
|
|
|
|
}
|
|
|
|
|
self.add_advance_action(&action);
|
2019-06-19 21:08:59 -07:00
|
|
|
add!(self, "\n");
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add_line!(self, "END_STATE();");
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-26 13:39:11 -07:00
|
|
|
fn add_character_range_conditions(
|
2018-12-23 10:16:03 -08:00
|
|
|
&mut self,
|
2020-05-26 13:39:11 -07:00
|
|
|
ranges: &[Range<char>],
|
|
|
|
|
is_included: bool,
|
|
|
|
|
indent_count: usize,
|
2021-02-17 13:08:56 -08:00
|
|
|
) {
|
2020-05-26 13:39:11 -07:00
|
|
|
let mut line_break = "\n".to_string();
|
|
|
|
|
for _ in 0..self.indent_level + indent_count {
|
|
|
|
|
line_break.push_str(" ");
|
2019-01-02 12:34:40 -08:00
|
|
|
}
|
|
|
|
|
|
2021-02-17 13:08:56 -08:00
|
|
|
for (i, range) in ranges.iter().enumerate() {
|
2020-05-26 13:39:11 -07:00
|
|
|
if is_included {
|
2021-02-17 13:08:56 -08:00
|
|
|
if i > 0 {
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, " ||{}", line_break);
|
2019-01-02 12:34:40 -08:00
|
|
|
}
|
|
|
|
|
if range.end == range.start {
|
2023-07-31 13:51:30 +01:00
|
|
|
if range.start == '\0' {
|
|
|
|
|
add!(self, "!eof && ");
|
|
|
|
|
}
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, "lookahead == ");
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character(range.start);
|
|
|
|
|
} else if range.end as u32 == range.start as u32 + 1 {
|
2023-07-31 13:51:30 +01:00
|
|
|
if range.start == '\0' {
|
|
|
|
|
add!(self, "!eof && ");
|
|
|
|
|
}
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, "lookahead == ");
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character(range.start);
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, " ||{}lookahead == ", line_break);
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character(range.end);
|
|
|
|
|
} else {
|
2023-07-31 13:51:30 +01:00
|
|
|
if range.start == '\0' {
|
|
|
|
|
add!(self, "!eof && ");
|
|
|
|
|
}
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, "(");
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character(range.start);
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, " <= lookahead && lookahead <= ");
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character(range.end);
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, ")");
|
2019-01-02 12:34:40 -08:00
|
|
|
}
|
|
|
|
|
} else {
|
2021-02-17 13:08:56 -08:00
|
|
|
if i > 0 {
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, " &&{}", line_break);
|
2019-01-02 12:34:40 -08:00
|
|
|
}
|
|
|
|
|
if range.end == range.start {
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, "lookahead != ");
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character(range.start);
|
|
|
|
|
} else if range.end as u32 == range.start as u32 + 1 {
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, "lookahead != ");
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character(range.start);
|
2020-05-26 13:39:11 -07:00
|
|
|
add!(self, " &&{}lookahead != ", line_break);
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character(range.end);
|
|
|
|
|
} else {
|
2021-01-28 14:58:29 -08:00
|
|
|
if range.start != '\0' {
|
|
|
|
|
add!(self, "(lookahead < ");
|
|
|
|
|
self.add_character(range.start);
|
|
|
|
|
add!(self, " || ");
|
|
|
|
|
self.add_character(range.end);
|
|
|
|
|
add!(self, " < lookahead)");
|
|
|
|
|
} else {
|
|
|
|
|
add!(self, "lookahead > ");
|
|
|
|
|
self.add_character(range.end);
|
|
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-02-17 13:08:56 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_character_tree(&mut self, tree: Option<&CharacterTree>) {
|
|
|
|
|
match tree {
|
|
|
|
|
Some(CharacterTree::Compare {
|
|
|
|
|
value,
|
|
|
|
|
operator,
|
|
|
|
|
consequence,
|
|
|
|
|
alternative,
|
|
|
|
|
}) => {
|
|
|
|
|
let op = match operator {
|
|
|
|
|
Comparator::Less => "<",
|
|
|
|
|
Comparator::LessOrEqual => "<=",
|
|
|
|
|
Comparator::Equal => "==",
|
|
|
|
|
Comparator::GreaterOrEqual => ">=",
|
|
|
|
|
};
|
|
|
|
|
let consequence = consequence.as_ref().map(Box::as_ref);
|
|
|
|
|
let alternative = alternative.as_ref().map(Box::as_ref);
|
2021-02-17 13:52:23 -08:00
|
|
|
|
|
|
|
|
let simple = alternative.is_none() && consequence == Some(&CharacterTree::Yes);
|
|
|
|
|
|
|
|
|
|
if !simple {
|
|
|
|
|
add!(self, "(");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add!(self, "c {} ", op);
|
|
|
|
|
self.add_character(*value);
|
|
|
|
|
|
|
|
|
|
if !simple {
|
|
|
|
|
if alternative.is_none() {
|
2021-02-17 13:08:56 -08:00
|
|
|
add!(self, " && ");
|
|
|
|
|
self.add_character_tree(consequence);
|
2021-02-17 13:52:23 -08:00
|
|
|
} else if consequence == Some(&CharacterTree::Yes) {
|
|
|
|
|
add!(self, " || ");
|
|
|
|
|
self.add_character_tree(alternative);
|
|
|
|
|
} else {
|
|
|
|
|
add!(self, "\n");
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_whitespace!(self);
|
|
|
|
|
add!(self, "? ");
|
|
|
|
|
self.add_character_tree(consequence);
|
|
|
|
|
add!(self, "\n");
|
|
|
|
|
add_whitespace!(self);
|
|
|
|
|
add!(self, ": ");
|
|
|
|
|
self.add_character_tree(alternative);
|
|
|
|
|
dedent!(self);
|
2021-02-17 13:08:56 -08:00
|
|
|
}
|
2021-02-17 13:52:23 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !simple {
|
|
|
|
|
add!(self, ")");
|
2021-02-17 13:08:56 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Some(CharacterTree::Yes) => {
|
|
|
|
|
add!(self, "true");
|
|
|
|
|
}
|
|
|
|
|
None => {
|
|
|
|
|
add!(self, "false");
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
}
|
|
|
|
|
|
2019-06-19 21:36:01 -07:00
|
|
|
fn add_advance_action(&mut self, action: &AdvanceAction) {
|
2019-01-02 16:48:44 -08:00
|
|
|
if action.in_main_token {
|
2019-06-19 21:36:01 -07:00
|
|
|
add!(self, "ADVANCE({});", action.state);
|
2019-01-02 16:48:44 -08:00
|
|
|
} else {
|
2019-06-19 21:36:01 -07:00
|
|
|
add!(self, "SKIP({})", action.state);
|
2019-01-02 16:48:44 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn add_lex_modes_list(&mut self) {
|
2021-05-16 17:55:58 +03:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const TSLexMode ts_lex_modes[STATE_COUNT] = {{"
|
|
|
|
|
);
|
2018-12-23 10:16:03 -08:00
|
|
|
indent!(self);
|
2019-05-16 16:27:05 -07:00
|
|
|
for (i, state) in self.parse_table.states.iter().enumerate() {
|
2021-02-18 15:43:01 -08:00
|
|
|
if state.is_end_of_non_terminal_extra() {
|
2020-05-12 15:42:11 -07:00
|
|
|
add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,);
|
2019-10-21 13:31:49 -07:00
|
|
|
} else if state.external_lex_state_id > 0 {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {{.lex_state = {}, .external_lex_state = {}}},",
|
|
|
|
|
i,
|
|
|
|
|
state.lex_state_id,
|
2019-05-16 16:27:05 -07:00
|
|
|
state.external_lex_state_id
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
add_line!(self, "[{}] = {{.lex_state = {}}},", i, state.lex_state_id);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_external_token_enum(&mut self) {
|
|
|
|
|
add_line!(self, "enum {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
for i in 0..self.syntax_grammar.external_tokens.len() {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"{} = {},",
|
|
|
|
|
self.external_token_id(&self.syntax_grammar.external_tokens[i]),
|
|
|
|
|
i
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_external_scanner_symbol_map(&mut self) {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
2021-04-26 21:03:35 +02:00
|
|
|
"static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{"
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
indent!(self);
|
|
|
|
|
for i in 0..self.syntax_grammar.external_tokens.len() {
|
2019-01-04 15:27:15 -08:00
|
|
|
let token = &self.syntax_grammar.external_tokens[i];
|
2019-01-17 17:16:04 -08:00
|
|
|
let id_token = token
|
|
|
|
|
.corresponding_internal_token
|
|
|
|
|
.unwrap_or(Symbol::external(i));
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {},",
|
2019-01-04 15:27:15 -08:00
|
|
|
self.external_token_id(&token),
|
|
|
|
|
self.symbol_ids[&id_token],
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_external_scanner_states_list(&mut self) {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
2021-04-26 21:03:35 +02:00
|
|
|
"static const bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{",
|
2019-05-16 16:27:05 -07:00
|
|
|
self.parse_table.external_lex_states.len(),
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-05-16 16:27:05 -07:00
|
|
|
for i in 0..self.parse_table.external_lex_states.len() {
|
|
|
|
|
if !self.parse_table.external_lex_states[i].is_empty() {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "[{}] = {{", i);
|
|
|
|
|
indent!(self);
|
2019-05-16 16:27:05 -07:00
|
|
|
for token in self.parse_table.external_lex_states[i].iter() {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = true,",
|
2019-08-29 15:26:05 -07:00
|
|
|
self.external_token_id(&self.syntax_grammar.external_tokens[token.index])
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_parse_table(&mut self) {
|
2021-08-04 21:31:25 +02:00
|
|
|
let mut parse_table_entries = HashMap::new();
|
2018-12-23 10:16:03 -08:00
|
|
|
let mut next_parse_action_list_index = 0;
|
|
|
|
|
|
|
|
|
|
self.get_parse_action_list_id(
|
|
|
|
|
&ParseTableEntry {
|
|
|
|
|
actions: Vec::new(),
|
|
|
|
|
reusable: false,
|
|
|
|
|
},
|
|
|
|
|
&mut parse_table_entries,
|
|
|
|
|
&mut next_parse_action_list_index,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
2021-04-26 21:03:35 +02:00
|
|
|
"static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{",
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-08-29 15:26:05 -07:00
|
|
|
|
|
|
|
|
let mut terminal_entries = Vec::new();
|
|
|
|
|
let mut nonterminal_entries = Vec::new();
|
|
|
|
|
|
2019-05-16 16:59:50 -07:00
|
|
|
for (i, state) in self
|
|
|
|
|
.parse_table
|
|
|
|
|
.states
|
|
|
|
|
.iter()
|
|
|
|
|
.enumerate()
|
|
|
|
|
.take(self.large_state_count)
|
|
|
|
|
{
|
|
|
|
|
add_line!(self, "[{}] = {{", i);
|
|
|
|
|
indent!(self);
|
|
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
// Ensure the entries are in a deterministic order, since they are
|
|
|
|
|
// internally represented as a hash map.
|
2019-08-29 15:26:05 -07:00
|
|
|
terminal_entries.clear();
|
|
|
|
|
nonterminal_entries.clear();
|
|
|
|
|
terminal_entries.extend(state.terminal_entries.iter());
|
|
|
|
|
nonterminal_entries.extend(state.nonterminal_entries.iter());
|
2019-05-16 16:59:50 -07:00
|
|
|
terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
|
|
|
|
|
nonterminal_entries.sort_unstable_by_key(|k| k.0);
|
2019-08-29 15:26:05 -07:00
|
|
|
|
2019-10-21 13:31:49 -07:00
|
|
|
for (symbol, action) in &nonterminal_entries {
|
2019-05-16 16:59:50 -07:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = STATE({}),",
|
|
|
|
|
self.symbol_ids[symbol],
|
2019-10-21 13:31:49 -07:00
|
|
|
match action {
|
|
|
|
|
GotoAction::Goto(state) => *state,
|
|
|
|
|
GotoAction::ShiftExtra => i,
|
|
|
|
|
}
|
2019-05-16 16:59:50 -07:00
|
|
|
);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
2019-08-29 15:26:05 -07:00
|
|
|
|
|
|
|
|
for (symbol, entry) in &terminal_entries {
|
2018-12-23 10:16:03 -08:00
|
|
|
let entry_id = self.get_parse_action_list_id(
|
|
|
|
|
entry,
|
|
|
|
|
&mut parse_table_entries,
|
|
|
|
|
&mut next_parse_action_list_index,
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = ACTIONS({}),",
|
|
|
|
|
self.symbol_ids[symbol],
|
|
|
|
|
entry_id
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
|
2019-05-16 17:19:44 -07:00
|
|
|
if self.large_state_count < self.parse_table.states.len() {
|
2021-04-26 21:03:35 +02:00
|
|
|
add_line!(self, "static const uint16_t ts_small_parse_table[] = {{");
|
2019-05-16 16:59:50 -07:00
|
|
|
indent!(self);
|
2019-08-30 20:29:55 -07:00
|
|
|
|
|
|
|
|
let mut index = 0;
|
|
|
|
|
let mut small_state_indices = Vec::new();
|
|
|
|
|
let mut symbols_by_value: HashMap<(usize, SymbolType), Vec<Symbol>> = HashMap::new();
|
2019-05-16 17:19:44 -07:00
|
|
|
for state in self.parse_table.states.iter().skip(self.large_state_count) {
|
2019-08-30 20:29:55 -07:00
|
|
|
small_state_indices.push(index);
|
|
|
|
|
symbols_by_value.clear();
|
2019-05-16 16:59:50 -07:00
|
|
|
|
2019-05-16 17:19:44 -07:00
|
|
|
terminal_entries.clear();
|
|
|
|
|
terminal_entries.extend(state.terminal_entries.iter());
|
|
|
|
|
terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
|
|
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
// In a given parse state, many lookahead symbols have the same actions.
|
|
|
|
|
// So in the "small state" representation, group symbols by their action
|
|
|
|
|
// in order to avoid repeating the action.
|
2019-05-16 17:19:44 -07:00
|
|
|
for (symbol, entry) in &terminal_entries {
|
|
|
|
|
let entry_id = self.get_parse_action_list_id(
|
|
|
|
|
entry,
|
|
|
|
|
&mut parse_table_entries,
|
|
|
|
|
&mut next_parse_action_list_index,
|
|
|
|
|
);
|
2019-08-30 20:29:55 -07:00
|
|
|
symbols_by_value
|
|
|
|
|
.entry((entry_id, SymbolType::Terminal))
|
|
|
|
|
.or_default()
|
|
|
|
|
.push(**symbol);
|
|
|
|
|
}
|
2019-10-21 13:31:49 -07:00
|
|
|
for (symbol, action) in &state.nonterminal_entries {
|
|
|
|
|
let state_id = match action {
|
|
|
|
|
GotoAction::Goto(i) => *i,
|
|
|
|
|
GotoAction::ShiftExtra => {
|
|
|
|
|
self.large_state_count + small_state_indices.len() - 1
|
|
|
|
|
}
|
|
|
|
|
};
|
2019-08-30 20:29:55 -07:00
|
|
|
symbols_by_value
|
2019-10-21 13:31:49 -07:00
|
|
|
.entry((state_id, SymbolType::NonTerminal))
|
2019-08-30 20:29:55 -07:00
|
|
|
.or_default()
|
|
|
|
|
.push(*symbol);
|
2019-05-16 17:19:44 -07:00
|
|
|
}
|
2019-05-16 16:59:50 -07:00
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
let mut values_with_symbols = symbols_by_value.drain().collect::<Vec<_>>();
|
|
|
|
|
values_with_symbols.sort_unstable_by_key(|((value, kind), symbols)| {
|
|
|
|
|
(symbols.len(), *kind, *value, symbols[0])
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
add_line!(self, "[{}] = {},", index, values_with_symbols.len());
|
|
|
|
|
indent!(self);
|
|
|
|
|
|
|
|
|
|
for ((value, kind), symbols) in values_with_symbols.iter_mut() {
|
|
|
|
|
if *kind == SymbolType::NonTerminal {
|
|
|
|
|
add_line!(self, "STATE({}), {},", value, symbols.len());
|
|
|
|
|
} else {
|
|
|
|
|
add_line!(self, "ACTIONS({}), {},", value, symbols.len());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
symbols.sort_unstable();
|
|
|
|
|
indent!(self);
|
|
|
|
|
for symbol in symbols {
|
|
|
|
|
add_line!(self, "{},", self.symbol_ids[symbol]);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
2019-05-16 17:19:44 -07:00
|
|
|
}
|
2019-08-30 20:29:55 -07:00
|
|
|
|
2019-05-16 17:19:44 -07:00
|
|
|
dedent!(self);
|
2019-05-16 16:59:50 -07:00
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
index += 1 + values_with_symbols
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|(_, symbols)| 2 + symbols.len())
|
|
|
|
|
.sum::<usize>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
|
2021-05-16 17:55:58 +03:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const uint32_t ts_small_parse_table_map[] = {{"
|
|
|
|
|
);
|
2019-08-30 20:29:55 -07:00
|
|
|
indent!(self);
|
|
|
|
|
for i in self.large_state_count..self.parse_table.states.len() {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[SMALL_STATE({})] = {},",
|
|
|
|
|
i,
|
|
|
|
|
small_state_indices[i - self.large_state_count]
|
|
|
|
|
);
|
2019-05-16 16:59:50 -07:00
|
|
|
}
|
|
|
|
|
dedent!(self);
|
2019-05-16 17:19:44 -07:00
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
2019-05-16 16:59:50 -07:00
|
|
|
}
|
|
|
|
|
|
2021-08-04 21:31:25 +02:00
|
|
|
let mut parse_table_entries: Vec<_> = parse_table_entries
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|(entry, i)| (i, entry))
|
|
|
|
|
.collect();
|
|
|
|
|
parse_table_entries.sort_by_key(|(index, _)| *index);
|
2018-12-23 10:16:03 -08:00
|
|
|
self.add_parse_action_list(parse_table_entries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) {
|
2021-05-16 17:55:58 +03:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const TSParseActionEntry ts_parse_actions[] = {{"
|
|
|
|
|
);
|
2018-12-23 10:16:03 -08:00
|
|
|
indent!(self);
|
|
|
|
|
for (i, entry) in parse_table_entries {
|
|
|
|
|
add!(
|
|
|
|
|
self,
|
2020-05-13 15:56:49 -07:00
|
|
|
" [{}] = {{.entry = {{.count = {}, .reusable = {}}}}},",
|
2018-12-23 10:16:03 -08:00
|
|
|
i,
|
|
|
|
|
entry.actions.len(),
|
|
|
|
|
entry.reusable
|
|
|
|
|
);
|
|
|
|
|
for action in entry.actions {
|
|
|
|
|
add!(self, " ");
|
|
|
|
|
match action {
|
|
|
|
|
ParseAction::Accept => add!(self, " ACCEPT_INPUT()"),
|
|
|
|
|
ParseAction::Recover => add!(self, "RECOVER()"),
|
|
|
|
|
ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"),
|
|
|
|
|
ParseAction::Shift {
|
|
|
|
|
state,
|
|
|
|
|
is_repetition,
|
|
|
|
|
} => {
|
|
|
|
|
if is_repetition {
|
|
|
|
|
add!(self, "SHIFT_REPEAT({})", state);
|
|
|
|
|
} else {
|
|
|
|
|
add!(self, "SHIFT({})", state);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ParseAction::Reduce {
|
|
|
|
|
symbol,
|
|
|
|
|
child_count,
|
|
|
|
|
dynamic_precedence,
|
2019-02-12 11:06:18 -08:00
|
|
|
production_id,
|
2018-12-23 10:16:03 -08:00
|
|
|
..
|
|
|
|
|
} => {
|
|
|
|
|
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
|
|
|
|
|
if dynamic_precedence != 0 {
|
|
|
|
|
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
|
|
|
|
|
}
|
2019-02-12 11:06:18 -08:00
|
|
|
if production_id != 0 {
|
|
|
|
|
add!(self, ", .production_id = {}", production_id);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
add!(self, ")");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
add!(self, ",")
|
|
|
|
|
}
|
|
|
|
|
add!(self, "\n");
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_parser_export(&mut self) {
|
|
|
|
|
let language_function_name = format!("tree_sitter_{}", self.language_name);
|
|
|
|
|
let external_scanner_name = format!("{}_external_scanner", language_function_name);
|
|
|
|
|
|
Add a language linkage declaration to parsers
Previously, in order to compile a `tree-sitter` grammar that contained
c++ source in the parser (ie the `scanner.cc` file), you would have to
compile the `parser.c` file separately from the c++ files. For example,
in rust this would result in a `build.rs` close to the following:
```
extern crate cc;
fn main() {
let dir: PathBuf = ["tree-sitter-ruby", "src"].iter().collect();
cc::Build::new()
.include(&dir)
.cpp(true)
.file(dir.join("scanner.cc"))
// NOTE: must have a name that differs from the c static lib
.compile("tree-sitter-ruby-scanner");
cc::Build::new()
.include(&dir)
.file(dir.join("parser.c"))
// NOTE: must have a name that differs from the c++ static lib
.compile("tree-sitter-ruby-parser");
}
```
This was necessary at the time for the following grammars: `ruby`,
`php`, `python`, `embedded-template`, `html`, `cpp`, `ocaml`,
`bash`, `agda`, and `haskell`.
To solve this, we specify an `extern "C"` language linkage declaration
to the functions that must be linked against to compile a parser with the
scanner, making parsers linkable against c++ source.
On all major compilers (gcc, clang, and msvc) this should be the only
change needed due to the combination of clang and gcc both supporting
designated initialization for years and msvc 2019 adopting designated
initializers as a part of the C++20 conformance push.
Subsequently, for rust projects, the necessary `build.rs` would become
(which also brings these parsers into sync with the current docs):
```
extern crate cc;
fn main() {
let dir: PathBuf = ["tree-sitter-ruby", "src"].iter().collect();
cc::Build::new()
.include(&dir)
.cpp(true)
.file(dir.join("scanner.cc"))
.file(dir.join("parser.c"))
.compile("tree-sitter-ruby");
}
```
2020-02-18 19:18:19 -08:00
|
|
|
add_line!(self, "#ifdef __cplusplus");
|
|
|
|
|
add_line!(self, r#"extern "C" {{"#);
|
|
|
|
|
add_line!(self, "#endif");
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
if !self.syntax_grammar.external_tokens.is_empty() {
|
2019-03-21 16:06:06 -07:00
|
|
|
add_line!(self, "void *{}_create(void);", external_scanner_name);
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "void {}_destroy(void *);", external_scanner_name);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"bool {}_scan(void *, TSLexer *, const bool *);",
|
|
|
|
|
external_scanner_name
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"unsigned {}_serialize(void *, char *);",
|
|
|
|
|
external_scanner_name
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"void {}_deserialize(void *, const char *, unsigned);",
|
|
|
|
|
external_scanner_name
|
|
|
|
|
);
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add_line!(self, "#ifdef _WIN32");
|
|
|
|
|
add_line!(self, "#define extern __declspec(dllexport)");
|
|
|
|
|
add_line!(self, "#endif");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
2019-03-21 16:06:06 -07:00
|
|
|
"extern const TSLanguage *{}(void) {{",
|
2018-12-23 10:16:03 -08:00
|
|
|
language_function_name
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
2021-04-26 21:03:35 +02:00
|
|
|
add_line!(self, "static const TSLanguage language = {{");
|
2018-12-23 10:16:03 -08:00
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, ".version = LANGUAGE_VERSION,");
|
2021-03-01 10:27:22 -08:00
|
|
|
|
|
|
|
|
// Quantities
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
|
|
|
|
|
add_line!(self, ".alias_count = ALIAS_COUNT,");
|
|
|
|
|
add_line!(self, ".token_count = TOKEN_COUNT,");
|
2020-10-15 07:20:12 -07:00
|
|
|
add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
|
2021-03-01 10:27:22 -08:00
|
|
|
add_line!(self, ".state_count = STATE_COUNT,");
|
|
|
|
|
add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
|
2022-01-17 14:45:07 -08:00
|
|
|
add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,");
|
2021-03-01 10:27:22 -08:00
|
|
|
add_line!(self, ".field_count = FIELD_COUNT,");
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Parse table
|
2021-04-26 21:03:35 +02:00
|
|
|
add_line!(self, ".parse_table = &ts_parse_table[0][0],");
|
2021-03-01 10:27:22 -08:00
|
|
|
if self.large_state_count < self.parse_table.states.len() {
|
2021-05-16 17:55:58 +03:00
|
|
|
add_line!(self, ".small_parse_table = ts_small_parse_table,");
|
|
|
|
|
add_line!(self, ".small_parse_table_map = ts_small_parse_table_map,");
|
2021-03-01 10:27:22 -08:00
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, ".parse_actions = ts_parse_actions,");
|
2019-12-05 17:21:46 -08:00
|
|
|
|
2021-03-01 10:27:22 -08:00
|
|
|
// Metadata
|
|
|
|
|
add_line!(self, ".symbol_names = ts_symbol_names,");
|
|
|
|
|
if !self.field_names.is_empty() {
|
|
|
|
|
add_line!(self, ".field_names = ts_field_names,");
|
2021-05-16 17:55:58 +03:00
|
|
|
add_line!(self, ".field_map_slices = ts_field_map_slices,");
|
|
|
|
|
add_line!(self, ".field_map_entries = ts_field_map_entries,");
|
2021-03-01 10:27:22 -08:00
|
|
|
}
|
|
|
|
|
add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
|
|
|
|
|
add_line!(self, ".public_symbol_map = ts_symbol_map,");
|
|
|
|
|
add_line!(self, ".alias_map = ts_non_terminal_alias_map,");
|
2019-08-11 09:22:30 -07:00
|
|
|
if !self.parse_table.production_infos.is_empty() {
|
2021-05-16 17:55:58 +03:00
|
|
|
add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],");
|
2019-02-07 17:18:33 -08:00
|
|
|
}
|
2022-01-17 14:45:07 -08:00
|
|
|
|
2021-03-01 10:27:22 -08:00
|
|
|
// Lexing
|
|
|
|
|
add_line!(self, ".lex_modes = ts_lex_modes,");
|
2020-10-15 07:20:12 -07:00
|
|
|
add_line!(self, ".lex_fn = ts_lex,");
|
2018-12-23 10:16:03 -08:00
|
|
|
if let Some(keyword_capture_token) = self.keyword_capture_token {
|
|
|
|
|
add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".keyword_capture_token = {},",
|
|
|
|
|
self.symbol_ids[&keyword_capture_token]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !self.syntax_grammar.external_tokens.is_empty() {
|
|
|
|
|
add_line!(self, ".external_scanner = {{");
|
|
|
|
|
indent!(self);
|
2021-04-26 21:03:35 +02:00
|
|
|
add_line!(self, "&ts_external_scanner_states[0][0],");
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "ts_external_scanner_symbol_map,");
|
|
|
|
|
add_line!(self, "{}_create,", external_scanner_name);
|
|
|
|
|
add_line!(self, "{}_destroy,", external_scanner_name);
|
|
|
|
|
add_line!(self, "{}_scan,", external_scanner_name);
|
|
|
|
|
add_line!(self, "{}_serialize,", external_scanner_name);
|
|
|
|
|
add_line!(self, "{}_deserialize,", external_scanner_name);
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-17 17:21:35 -08:00
|
|
|
if self.abi_version >= ABI_VERSION_WITH_PRIMARY_STATES {
|
|
|
|
|
add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-15 07:20:12 -07:00
|
|
|
dedent!(self);
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "return &language;");
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}}");
|
Add a language linkage declaration to parsers
Previously, in order to compile a `tree-sitter` grammar that contained
c++ source in the parser (ie the `scanner.cc` file), you would have to
compile the `parser.c` file separately from the c++ files. For example,
in rust this would result in a `build.rs` close to the following:
```
extern crate cc;
fn main() {
let dir: PathBuf = ["tree-sitter-ruby", "src"].iter().collect();
cc::Build::new()
.include(&dir)
.cpp(true)
.file(dir.join("scanner.cc"))
// NOTE: must have a name that differs from the c static lib
.compile("tree-sitter-ruby-scanner");
cc::Build::new()
.include(&dir)
.file(dir.join("parser.c"))
// NOTE: must have a name that differs from the c++ static lib
.compile("tree-sitter-ruby-parser");
}
```
This was necessary at the time for the following grammars: `ruby`,
`php`, `python`, `embedded-template`, `html`, `cpp`, `ocaml`,
`bash`, `agda`, and `haskell`.
To solve this, we specify an `extern "C"` language linkage declaration
to the functions that must be linked against to compile a parser with the
scanner, making parsers linkable against c++ source.
On all major compilers (gcc, clang, and msvc) this should be the only
change needed due to the combination of clang and gcc both supporting
designated initialization for years and msvc 2019 adopting designated
initializers as a part of the C++20 conformance push.
Subsequently, for rust projects, the necessary `build.rs` would become
(which also brings these parsers into sync with the current docs):
```
extern crate cc;
fn main() {
let dir: PathBuf = ["tree-sitter-ruby", "src"].iter().collect();
cc::Build::new()
.include(&dir)
.cpp(true)
.file(dir.join("scanner.cc"))
.file(dir.join("parser.c"))
.compile("tree-sitter-ruby");
}
```
2020-02-18 19:18:19 -08:00
|
|
|
add_line!(self, "#ifdef __cplusplus");
|
|
|
|
|
add_line!(self, "}}");
|
|
|
|
|
add_line!(self, "#endif");
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn get_parse_action_list_id(
|
|
|
|
|
&self,
|
|
|
|
|
entry: &ParseTableEntry,
|
2021-08-04 21:31:25 +02:00
|
|
|
parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
|
2018-12-23 10:16:03 -08:00
|
|
|
next_parse_action_list_index: &mut usize,
|
|
|
|
|
) -> usize {
|
2021-08-04 21:31:25 +02:00
|
|
|
if let Some(&index) = parse_table_entries.get(entry) {
|
|
|
|
|
index
|
|
|
|
|
} else {
|
|
|
|
|
let result = *next_parse_action_list_index;
|
|
|
|
|
parse_table_entries.insert(entry.clone(), result);
|
|
|
|
|
*next_parse_action_list_index += 1 + entry.actions.len();
|
|
|
|
|
result
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
fn get_field_map_id(
|
|
|
|
|
&self,
|
|
|
|
|
flat_field_map: &Vec<(String, FieldLocation)>,
|
|
|
|
|
flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
|
|
|
|
|
next_flat_field_map_index: &mut usize,
|
|
|
|
|
) -> usize {
|
|
|
|
|
if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
|
|
|
|
|
return *index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let result = *next_flat_field_map_index;
|
|
|
|
|
flat_field_maps.push((result, flat_field_map.clone()));
|
|
|
|
|
*next_flat_field_map_index += flat_field_map.len();
|
|
|
|
|
result
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn external_token_id(&self, token: &ExternalToken) -> String {
|
|
|
|
|
format!(
|
|
|
|
|
"ts_external_token_{}",
|
|
|
|
|
self.sanitize_identifier(&token.name)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet<String>) {
|
2018-12-20 13:36:39 -08:00
|
|
|
let mut id;
|
|
|
|
|
if symbol == Symbol::end() {
|
|
|
|
|
id = "ts_builtin_sym_end".to_string();
|
|
|
|
|
} else {
|
|
|
|
|
let (name, kind) = self.metadata_for_symbol(symbol);
|
|
|
|
|
id = match kind {
|
2018-12-23 10:16:03 -08:00
|
|
|
VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)),
|
|
|
|
|
VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)),
|
2018-12-20 13:36:39 -08:00
|
|
|
VariableType::Hidden | VariableType::Named => {
|
2018-12-23 10:16:03 -08:00
|
|
|
format!("sym_{}", self.sanitize_identifier(name))
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let mut suffix_number = 1;
|
|
|
|
|
let mut suffix = String::new();
|
2018-12-23 10:16:03 -08:00
|
|
|
while used_identifiers.contains(&id) {
|
2018-12-20 13:36:39 -08:00
|
|
|
id.drain(id.len() - suffix.len()..);
|
|
|
|
|
suffix_number += 1;
|
|
|
|
|
suffix = suffix_number.to_string();
|
|
|
|
|
id += &suffix;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
used_identifiers.insert(id.clone());
|
2018-12-20 13:36:39 -08:00
|
|
|
self.symbol_ids.insert(symbol, id);
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 12:29:20 -08:00
|
|
|
fn field_id(&self, field_name: &String) -> String {
|
2019-02-08 16:06:29 -08:00
|
|
|
format!("field_{}", field_name)
|
2019-02-07 12:29:20 -08:00
|
|
|
}
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
|
|
|
|
|
match symbol.kind {
|
2021-02-18 15:43:01 -08:00
|
|
|
SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden),
|
2018-12-20 13:36:39 -08:00
|
|
|
SymbolType::NonTerminal => {
|
|
|
|
|
let variable = &self.syntax_grammar.variables[symbol.index];
|
|
|
|
|
(&variable.name, variable.kind)
|
|
|
|
|
}
|
|
|
|
|
SymbolType::Terminal => {
|
|
|
|
|
let variable = &self.lexical_grammar.variables[symbol.index];
|
|
|
|
|
(&variable.name, variable.kind)
|
|
|
|
|
}
|
|
|
|
|
SymbolType::External => {
|
|
|
|
|
let token = &self.syntax_grammar.external_tokens[symbol.index];
|
|
|
|
|
(&token.name, token.kind)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn sanitize_identifier(&self, name: &str) -> String {
|
|
|
|
|
let mut result = String::with_capacity(name.len());
|
|
|
|
|
for c in name.chars() {
|
|
|
|
|
if ('a' <= c && c <= 'z')
|
|
|
|
|
|| ('A' <= c && c <= 'Z')
|
|
|
|
|
|| ('0' <= c && c <= '9')
|
|
|
|
|
|| c == '_'
|
|
|
|
|
{
|
|
|
|
|
result.push(c);
|
|
|
|
|
} else {
|
2019-01-02 12:34:40 -08:00
|
|
|
let replacement = match c {
|
2018-12-23 10:16:03 -08:00
|
|
|
'~' => "TILDE",
|
|
|
|
|
'`' => "BQUOTE",
|
|
|
|
|
'!' => "BANG",
|
|
|
|
|
'@' => "AT",
|
|
|
|
|
'#' => "POUND",
|
|
|
|
|
'$' => "DOLLAR",
|
|
|
|
|
'%' => "PERCENT",
|
|
|
|
|
'^' => "CARET",
|
|
|
|
|
'&' => "AMP",
|
|
|
|
|
'*' => "STAR",
|
|
|
|
|
'(' => "LPAREN",
|
|
|
|
|
')' => "RPAREN",
|
|
|
|
|
'-' => "DASH",
|
|
|
|
|
'+' => "PLUS",
|
|
|
|
|
'=' => "EQ",
|
|
|
|
|
'{' => "LBRACE",
|
|
|
|
|
'}' => "RBRACE",
|
|
|
|
|
'[' => "LBRACK",
|
|
|
|
|
']' => "RBRACK",
|
|
|
|
|
'\\' => "BSLASH",
|
|
|
|
|
'|' => "PIPE",
|
|
|
|
|
':' => "COLON",
|
|
|
|
|
';' => "SEMI",
|
|
|
|
|
'"' => "DQUOTE",
|
|
|
|
|
'\'' => "SQUOTE",
|
|
|
|
|
'<' => "LT",
|
|
|
|
|
'>' => "GT",
|
|
|
|
|
',' => "COMMA",
|
|
|
|
|
'.' => "DOT",
|
|
|
|
|
'?' => "QMARK",
|
|
|
|
|
'/' => "SLASH",
|
|
|
|
|
'\n' => "LF",
|
|
|
|
|
'\r' => "CR",
|
|
|
|
|
'\t' => "TAB",
|
|
|
|
|
_ => continue,
|
2019-01-02 12:34:40 -08:00
|
|
|
};
|
|
|
|
|
if !result.is_empty() && !result.ends_with("_") {
|
|
|
|
|
result.push('_');
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
result += replacement;
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
result
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn sanitize_string(&self, name: &str) -> String {
|
|
|
|
|
let mut result = String::with_capacity(name.len());
|
|
|
|
|
for c in name.chars() {
|
2019-01-11 17:43:27 -08:00
|
|
|
match c {
|
|
|
|
|
'\"' => result += "\\\"",
|
2020-09-23 13:06:06 -07:00
|
|
|
'?' => result += "\\?",
|
2019-01-11 17:43:27 -08:00
|
|
|
'\\' => result += "\\\\",
|
2019-11-13 10:54:34 -08:00
|
|
|
'\u{000c}' => result += "\\f",
|
2019-01-12 21:42:31 -08:00
|
|
|
'\n' => result += "\\n",
|
|
|
|
|
'\r' => result += "\\r",
|
2019-11-13 10:54:34 -08:00
|
|
|
'\t' => result += "\\t",
|
2019-01-11 17:43:27 -08:00
|
|
|
_ => result.push(c),
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
result
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
|
|
|
|
|
fn add_character(&mut self, c: char) {
|
2019-11-13 10:54:34 -08:00
|
|
|
match c {
|
|
|
|
|
'\'' => add!(self, "'\\''"),
|
|
|
|
|
'\\' => add!(self, "'\\\\'"),
|
|
|
|
|
'\u{000c}' => add!(self, "'\\f'"),
|
|
|
|
|
'\n' => add!(self, "'\\n'"),
|
|
|
|
|
'\t' => add!(self, "'\\t'"),
|
|
|
|
|
'\r' => add!(self, "'\\r'"),
|
|
|
|
|
_ => {
|
|
|
|
|
if c == ' ' || c.is_ascii_graphic() {
|
|
|
|
|
add!(self, "'{}'", c)
|
|
|
|
|
} else {
|
|
|
|
|
add!(self, "{}", c as u32)
|
|
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
2018-12-05 12:50:12 -08:00
|
|
|
|
2019-08-28 17:14:04 -07:00
|
|
|
/// Returns a String of C code for the given components of a parser.
|
|
|
|
|
///
|
|
|
|
|
/// # Arguments
|
|
|
|
|
///
|
|
|
|
|
/// * `name` - A string slice containing the name of the language
|
|
|
|
|
/// * `parse_table` - The generated parse table for the language
|
|
|
|
|
/// * `main_lex_table` - The generated lexing table for the language
|
|
|
|
|
/// * `keyword_lex_table` - The generated keyword lexing table for the language
|
|
|
|
|
/// * `keyword_capture_token` - A symbol indicating which token is used
|
|
|
|
|
/// for keyword capture, if any.
|
|
|
|
|
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
|
|
|
|
|
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
|
2020-10-27 15:46:09 -07:00
|
|
|
/// * `default_aliases` - A map describing the global rename rules that should apply.
|
2019-08-28 17:14:04 -07:00
|
|
|
/// the keys are symbols that are *always* aliased in the same way, and the values
|
|
|
|
|
/// are the aliases that are applied to those symbols.
|
2022-01-17 14:45:07 -08:00
|
|
|
/// * `abi_version` - The language ABI version that should be generated. Usually
|
|
|
|
|
/// you want Tree-sitter's current version, but right after making an ABI
|
|
|
|
|
/// change, it may be useful to generate code with the previous ABI.
|
2018-12-06 22:11:52 -08:00
|
|
|
pub(crate) fn render_c_code(
|
2018-12-05 12:50:12 -08:00
|
|
|
name: &str,
|
|
|
|
|
parse_table: ParseTable,
|
|
|
|
|
main_lex_table: LexTable,
|
|
|
|
|
keyword_lex_table: LexTable,
|
|
|
|
|
keyword_capture_token: Option<Symbol>,
|
|
|
|
|
syntax_grammar: SyntaxGrammar,
|
|
|
|
|
lexical_grammar: LexicalGrammar,
|
2020-10-27 15:46:09 -07:00
|
|
|
default_aliases: AliasMap,
|
2022-01-17 14:45:07 -08:00
|
|
|
abi_version: usize,
|
2018-12-05 12:50:12 -08:00
|
|
|
) -> String {
|
2022-01-17 14:45:07 -08:00
|
|
|
if !(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version) {
|
|
|
|
|
panic!(
|
|
|
|
|
"This version of Tree-sitter can only generate parsers with ABI version {} - {}, not {}",
|
|
|
|
|
ABI_VERSION_MIN, ABI_VERSION_MAX, abi_version
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
Generator {
|
|
|
|
|
buffer: String::new(),
|
|
|
|
|
indent_level: 0,
|
|
|
|
|
language_name: name.to_string(),
|
2019-05-16 16:59:50 -07:00
|
|
|
large_state_count: 0,
|
2018-12-20 13:36:39 -08:00
|
|
|
parse_table,
|
|
|
|
|
main_lex_table,
|
|
|
|
|
keyword_lex_table,
|
|
|
|
|
keyword_capture_token,
|
|
|
|
|
syntax_grammar,
|
|
|
|
|
lexical_grammar,
|
2020-10-27 15:46:09 -07:00
|
|
|
default_aliases,
|
2018-12-20 13:36:39 -08:00
|
|
|
symbol_ids: HashMap::new(),
|
2019-05-16 16:59:50 -07:00
|
|
|
symbol_order: HashMap::new(),
|
2018-12-23 10:16:03 -08:00
|
|
|
alias_ids: HashMap::new(),
|
2020-08-21 14:12:04 -07:00
|
|
|
symbol_map: HashMap::new(),
|
|
|
|
|
unique_aliases: Vec::new(),
|
2019-02-07 12:29:20 -08:00
|
|
|
field_names: Vec::new(),
|
2022-01-17 14:45:07 -08:00
|
|
|
abi_version,
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
.generate()
|
2018-12-05 12:50:12 -08:00
|
|
|
}
|