2019-01-07 10:23:01 -08:00
|
|
|
use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
|
|
|
|
|
use super::nfa::CharacterSet;
|
2019-05-16 16:27:05 -07:00
|
|
|
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
|
2019-02-07 17:18:33 -08:00
|
|
|
use super::tables::{
|
2019-10-21 13:31:49 -07:00
|
|
|
AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
|
|
|
|
|
ParseTableEntry,
|
2019-02-07 17:18:33 -08:00
|
|
|
};
|
2019-01-02 12:34:40 -08:00
|
|
|
use core::ops::Range;
|
2019-05-16 17:19:44 -07:00
|
|
|
use std::cmp;
|
2019-08-29 15:26:05 -07:00
|
|
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
2018-12-20 13:36:39 -08:00
|
|
|
use std::fmt::Write;
|
2018-12-23 10:16:03 -08:00
|
|
|
use std::mem::swap;
|
2019-08-28 17:14:04 -07:00
|
|
|
|
|
|
|
|
// Currently, the library supports a new ABI version that has not yet been
|
|
|
|
|
// stabilized, and the parser generation does not use it by default.
|
|
|
|
|
const STABLE_LANGUAGE_VERSION: usize = tree_sitter::LANGUAGE_VERSION - 1;
|
2018-12-23 10:16:03 -08:00
|
|
|
|
|
|
|
|
macro_rules! add {
|
|
|
|
|
($this: tt, $($arg: tt)*) => {{
|
|
|
|
|
$this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
|
|
|
|
|
}}
|
|
|
|
|
}
|
2018-12-20 13:36:39 -08:00
|
|
|
|
2019-01-02 12:34:40 -08:00
|
|
|
macro_rules! add_whitespace {
|
|
|
|
|
($this: tt) => {{
|
2018-12-20 13:36:39 -08:00
|
|
|
for _ in 0..$this.indent_level {
|
|
|
|
|
write!(&mut $this.buffer, " ").unwrap();
|
|
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! add_line {
|
|
|
|
|
($this: tt, $($arg: tt)*) => {
|
|
|
|
|
add_whitespace!($this);
|
2018-12-20 13:36:39 -08:00
|
|
|
$this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
|
|
|
|
|
$this.buffer += "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
macro_rules! indent {
|
|
|
|
|
($this: tt) => {
|
|
|
|
|
$this.indent_level += 1;
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! dedent {
|
|
|
|
|
($this: tt) => {
|
2019-06-19 21:08:59 -07:00
|
|
|
assert_ne!($this.indent_level, 0);
|
2018-12-23 10:16:03 -08:00
|
|
|
$this.indent_level -= 1;
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
const SMALL_STATE_THRESHOLD: usize = 64;
|
2019-05-16 16:59:50 -07:00
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
struct Generator {
|
|
|
|
|
buffer: String,
|
|
|
|
|
indent_level: usize,
|
|
|
|
|
language_name: String,
|
|
|
|
|
parse_table: ParseTable,
|
|
|
|
|
main_lex_table: LexTable,
|
|
|
|
|
keyword_lex_table: LexTable,
|
2019-05-16 16:59:50 -07:00
|
|
|
large_state_count: usize,
|
2018-12-20 13:36:39 -08:00
|
|
|
keyword_capture_token: Option<Symbol>,
|
|
|
|
|
syntax_grammar: SyntaxGrammar,
|
|
|
|
|
lexical_grammar: LexicalGrammar,
|
|
|
|
|
simple_aliases: AliasMap,
|
2019-05-16 16:59:50 -07:00
|
|
|
symbol_order: HashMap<Symbol, usize>,
|
2018-12-20 13:36:39 -08:00
|
|
|
symbol_ids: HashMap<Symbol, String>,
|
2018-12-23 10:16:03 -08:00
|
|
|
alias_ids: HashMap<Alias, String>,
|
2019-08-29 15:26:05 -07:00
|
|
|
alias_map: BTreeMap<Alias, Option<Symbol>>,
|
2019-02-07 12:29:20 -08:00
|
|
|
field_names: Vec<String>,
|
2019-08-28 17:14:04 -07:00
|
|
|
next_abi: bool,
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Generator {
|
|
|
|
|
fn generate(mut self) -> String {
|
2019-02-07 12:29:20 -08:00
|
|
|
self.init();
|
2018-12-20 13:36:39 -08:00
|
|
|
self.add_includes();
|
|
|
|
|
self.add_pragmas();
|
|
|
|
|
self.add_stats();
|
|
|
|
|
self.add_symbol_enum();
|
|
|
|
|
self.add_symbol_names_list();
|
2019-12-05 17:21:46 -08:00
|
|
|
|
|
|
|
|
if self.next_abi {
|
|
|
|
|
self.add_unique_symbol_map();
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
self.add_symbol_metadata_list();
|
2019-01-12 21:57:34 -08:00
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
if !self.field_names.is_empty() {
|
|
|
|
|
self.add_field_name_enum();
|
2019-02-07 12:29:20 -08:00
|
|
|
self.add_field_name_names_list();
|
2019-02-07 17:18:33 -08:00
|
|
|
self.add_field_sequences();
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-11 09:22:30 -07:00
|
|
|
if !self.parse_table.production_infos.is_empty() {
|
2019-02-07 17:18:33 -08:00
|
|
|
self.add_alias_sequences();
|
2019-01-12 21:57:34 -08:00
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
|
|
|
|
|
let mut main_lex_table = LexTable::default();
|
|
|
|
|
swap(&mut main_lex_table, &mut self.main_lex_table);
|
|
|
|
|
self.add_lex_function("ts_lex", main_lex_table);
|
|
|
|
|
|
|
|
|
|
if self.keyword_capture_token.is_some() {
|
|
|
|
|
let mut keyword_lex_table = LexTable::default();
|
|
|
|
|
swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
|
|
|
|
|
self.add_lex_function("ts_lex_keywords", keyword_lex_table);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.add_lex_modes_list();
|
|
|
|
|
|
|
|
|
|
if !self.syntax_grammar.external_tokens.is_empty() {
|
|
|
|
|
self.add_external_token_enum();
|
|
|
|
|
self.add_external_scanner_symbol_map();
|
|
|
|
|
self.add_external_scanner_states_list();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.add_parse_table();
|
|
|
|
|
self.add_parser_export();
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
self.buffer
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 12:29:20 -08:00
|
|
|
fn init(&mut self) {
|
|
|
|
|
let mut symbol_identifiers = HashSet::new();
|
|
|
|
|
for i in 0..self.parse_table.symbols.len() {
|
|
|
|
|
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut field_names = Vec::new();
|
2019-02-12 11:06:18 -08:00
|
|
|
for production_info in &self.parse_table.production_infos {
|
|
|
|
|
for field_name in production_info.field_map.keys() {
|
2019-02-07 17:18:33 -08:00
|
|
|
field_names.push(field_name);
|
|
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
|
2019-02-12 11:06:18 -08:00
|
|
|
for alias in &production_info.alias_sequence {
|
2019-02-07 17:18:33 -08:00
|
|
|
if let Some(alias) = &alias {
|
2019-12-12 10:06:18 -08:00
|
|
|
let alias_kind = alias.kind();
|
2019-02-07 12:29:20 -08:00
|
|
|
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
|
|
|
|
|
let (name, kind) = self.metadata_for_symbol(*symbol);
|
|
|
|
|
name == alias.value && kind == alias_kind
|
|
|
|
|
});
|
|
|
|
|
let alias_id = if let Some(symbol) = matching_symbol {
|
|
|
|
|
self.symbol_ids[&symbol].clone()
|
|
|
|
|
} else if alias.is_named {
|
|
|
|
|
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
|
|
|
|
} else {
|
|
|
|
|
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
|
|
|
|
};
|
|
|
|
|
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
|
|
|
|
self.alias_map
|
|
|
|
|
.entry(alias.clone())
|
|
|
|
|
.or_insert(matching_symbol);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
field_names.sort_unstable();
|
|
|
|
|
field_names.dedup();
|
|
|
|
|
self.field_names = field_names.into_iter().cloned().collect();
|
2019-08-28 17:14:04 -07:00
|
|
|
|
|
|
|
|
// If we are opting in to the new unstable language ABI, then use the concept of
|
|
|
|
|
// "small parse states". Otherwise, use the same representation for all parse
|
|
|
|
|
// states.
|
|
|
|
|
if self.next_abi {
|
2019-08-30 20:29:55 -07:00
|
|
|
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
2019-08-28 17:14:04 -07:00
|
|
|
self.large_state_count = self
|
|
|
|
|
.parse_table
|
|
|
|
|
.states
|
|
|
|
|
.iter()
|
|
|
|
|
.enumerate()
|
|
|
|
|
.take_while(|(i, s)| {
|
|
|
|
|
*i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
|
|
|
|
|
})
|
|
|
|
|
.count();
|
|
|
|
|
} else {
|
|
|
|
|
self.large_state_count = self.parse_table.states.len();
|
|
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
}
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
fn add_includes(&mut self) {
|
|
|
|
|
add_line!(self, "#include <tree_sitter/parser.h>");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_pragmas(&mut self) {
|
|
|
|
|
add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
|
|
|
|
|
add_line!(self, "#pragma GCC diagnostic push");
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""
|
|
|
|
|
);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "#endif");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
|
2019-02-06 11:50:31 -08:00
|
|
|
// Compiling large lexer functions can be very slow. Disabling optimizations
|
|
|
|
|
// is not ideal, but only a very small fraction of overall parse time is
|
|
|
|
|
// spent lexing, so the performance impact of this is negligible.
|
|
|
|
|
if self.main_lex_table.states.len() > 300 {
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "#ifdef _MSC_VER");
|
|
|
|
|
add_line!(self, "#pragma optimize(\"\", off)");
|
2019-02-06 11:50:31 -08:00
|
|
|
add_line!(self, "#elif defined(__clang__)");
|
|
|
|
|
add_line!(self, "#pragma clang optimize off");
|
|
|
|
|
add_line!(self, "#elif defined(__GNUC__)");
|
|
|
|
|
add_line!(self, "#pragma GCC optimize (\"O0\")");
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "#endif");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_stats(&mut self) {
|
2018-12-23 10:16:03 -08:00
|
|
|
let token_count = self
|
|
|
|
|
.parse_table
|
|
|
|
|
.symbols
|
|
|
|
|
.iter()
|
|
|
|
|
.filter(|symbol| {
|
2019-01-03 10:31:14 -08:00
|
|
|
if symbol.is_terminal() || symbol.is_eof() {
|
2018-12-23 10:16:03 -08:00
|
|
|
true
|
|
|
|
|
} else if symbol.is_external() {
|
|
|
|
|
self.syntax_grammar.external_tokens[symbol.index]
|
|
|
|
|
.corresponding_internal_token
|
|
|
|
|
.is_none()
|
|
|
|
|
} else {
|
|
|
|
|
false
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
})
|
|
|
|
|
.count();
|
|
|
|
|
|
2019-08-28 17:14:04 -07:00
|
|
|
if self.next_abi {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define LANGUAGE_VERSION {}",
|
|
|
|
|
tree_sitter::LANGUAGE_VERSION
|
|
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
add_line!(self, "#define LANGUAGE_VERSION {}", STABLE_LANGUAGE_VERSION);
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define STATE_COUNT {}",
|
|
|
|
|
self.parse_table.states.len()
|
|
|
|
|
);
|
2019-08-28 17:14:04 -07:00
|
|
|
|
|
|
|
|
if self.next_abi {
|
|
|
|
|
add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define SYMBOL_COUNT {}",
|
|
|
|
|
self.parse_table.symbols.len()
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define ALIAS_COUNT {}",
|
|
|
|
|
self.alias_map.iter().filter(|e| e.1.is_none()).count()
|
|
|
|
|
);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "#define TOKEN_COUNT {}", token_count);
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"#define EXTERNAL_TOKEN_COUNT {}",
|
|
|
|
|
self.syntax_grammar.external_tokens.len()
|
|
|
|
|
);
|
2019-02-07 12:29:20 -08:00
|
|
|
add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
|
2019-01-11 17:26:45 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
2019-02-08 16:14:18 -08:00
|
|
|
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
|
|
|
|
|
self.parse_table.max_aliased_production_length
|
2019-01-11 17:26:45 -08:00
|
|
|
);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_symbol_enum(&mut self) {
|
|
|
|
|
add_line!(self, "enum {{");
|
2018-12-23 10:16:03 -08:00
|
|
|
indent!(self);
|
2019-05-16 16:59:50 -07:00
|
|
|
self.symbol_order.insert(Symbol::end(), 0);
|
2018-12-23 10:16:03 -08:00
|
|
|
let mut i = 1;
|
|
|
|
|
for symbol in self.parse_table.symbols.iter() {
|
|
|
|
|
if *symbol != Symbol::end() {
|
2019-05-16 16:59:50 -07:00
|
|
|
self.symbol_order.insert(*symbol, i);
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "{} = {},", self.symbol_ids[&symbol], i);
|
|
|
|
|
i += 1;
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
for (alias, symbol) in &self.alias_map {
|
|
|
|
|
if symbol.is_none() {
|
|
|
|
|
add_line!(self, "{} = {},", self.alias_ids[&alias], i);
|
2019-01-14 14:08:07 -08:00
|
|
|
i += 1;
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_symbol_names_list(&mut self) {
|
|
|
|
|
add_line!(self, "static const char *ts_symbol_names[] = {{");
|
2018-12-23 10:16:03 -08:00
|
|
|
indent!(self);
|
|
|
|
|
for symbol in self.parse_table.symbols.iter() {
|
2019-01-04 11:19:53 -08:00
|
|
|
let name = self.sanitize_string(
|
|
|
|
|
self.simple_aliases
|
|
|
|
|
.get(symbol)
|
|
|
|
|
.map(|alias| alias.value.as_str())
|
|
|
|
|
.unwrap_or(self.metadata_for_symbol(*symbol).0),
|
|
|
|
|
);
|
|
|
|
|
add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
for (alias, symbol) in &self.alias_map {
|
|
|
|
|
if symbol.is_none() {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = \"{}\",",
|
|
|
|
|
self.alias_ids[&alias],
|
|
|
|
|
self.sanitize_string(&alias.value)
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
fn add_unique_symbol_map(&mut self) {
|
|
|
|
|
add_line!(self, "static TSSymbol ts_symbol_map[] = {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
for symbol in &self.parse_table.symbols {
|
2020-03-20 11:35:11 -07:00
|
|
|
let mut mapping = symbol;
|
|
|
|
|
|
2020-01-27 11:20:48 -08:00
|
|
|
// There can be multiple symbols in the grammar that have the same name and kind,
|
|
|
|
|
// due to simple aliases. When that happens, ensure that they map to the same
|
|
|
|
|
// public-facing symbol. If one of the symbols is not aliased, choose that one
|
|
|
|
|
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
|
|
|
|
|
// numeric value.
|
2019-12-05 17:21:46 -08:00
|
|
|
if let Some(alias) = self.simple_aliases.get(symbol) {
|
2019-12-12 10:06:18 -08:00
|
|
|
let kind = alias.kind();
|
2019-12-05 17:21:46 -08:00
|
|
|
for other_symbol in &self.parse_table.symbols {
|
|
|
|
|
if let Some(other_alias) = self.simple_aliases.get(other_symbol) {
|
2020-01-27 11:20:48 -08:00
|
|
|
if other_symbol < mapping && other_alias == alias {
|
2019-12-05 17:21:46 -08:00
|
|
|
mapping = other_symbol;
|
|
|
|
|
}
|
|
|
|
|
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
|
|
|
|
|
mapping = other_symbol;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-03-20 11:35:11 -07:00
|
|
|
// Two anonymous tokens with different flags but the same string value
|
|
|
|
|
// should be represented with the same symbol in the public API. Examples:
|
|
|
|
|
// * "<" and token(prec(1, "<"))
|
|
|
|
|
// * "(" and token.immediate("(")
|
|
|
|
|
else if symbol.is_terminal() {
|
|
|
|
|
let metadata = self.metadata_for_symbol(*symbol);
|
|
|
|
|
for other_symbol in &self.parse_table.symbols {
|
|
|
|
|
let other_metadata = self.metadata_for_symbol(*other_symbol);
|
|
|
|
|
if other_metadata == metadata {
|
|
|
|
|
mapping = other_symbol;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-12-05 17:21:46 -08:00
|
|
|
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {},",
|
|
|
|
|
self.symbol_ids[&symbol],
|
2020-01-27 11:20:48 -08:00
|
|
|
self.symbol_ids[mapping],
|
2019-12-05 17:21:46 -08:00
|
|
|
);
|
|
|
|
|
}
|
2019-12-06 12:11:09 -08:00
|
|
|
|
|
|
|
|
for (alias, symbol) in &self.alias_map {
|
|
|
|
|
if symbol.is_none() {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {},",
|
|
|
|
|
self.alias_ids[&alias],
|
|
|
|
|
self.alias_ids[&alias],
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 12:29:20 -08:00
|
|
|
fn add_field_name_enum(&mut self) {
|
|
|
|
|
add_line!(self, "enum {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
for (i, field_name) in self.field_names.iter().enumerate() {
|
|
|
|
|
add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_field_name_names_list(&mut self) {
|
|
|
|
|
add_line!(self, "static const char *ts_field_names[] = {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, "[0] = NULL,");
|
|
|
|
|
for field_name in &self.field_names {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = \"{}\",",
|
|
|
|
|
self.field_id(field_name),
|
|
|
|
|
field_name
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn add_symbol_metadata_list(&mut self) {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const TSSymbolMetadata ts_symbol_metadata[] = {{"
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
|
|
|
|
for symbol in &self.parse_table.symbols {
|
|
|
|
|
add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]);
|
|
|
|
|
indent!(self);
|
2019-01-03 14:08:24 -08:00
|
|
|
if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) {
|
|
|
|
|
add_line!(self, ".visible = true,");
|
|
|
|
|
add_line!(self, ".named = {},", is_named);
|
|
|
|
|
} else {
|
|
|
|
|
match self.metadata_for_symbol(*symbol).1 {
|
|
|
|
|
VariableType::Named => {
|
|
|
|
|
add_line!(self, ".visible = true,");
|
|
|
|
|
add_line!(self, ".named = true,");
|
|
|
|
|
}
|
|
|
|
|
VariableType::Anonymous => {
|
|
|
|
|
add_line!(self, ".visible = true,");
|
|
|
|
|
add_line!(self, ".named = false,");
|
|
|
|
|
}
|
|
|
|
|
VariableType::Hidden => {
|
|
|
|
|
add_line!(self, ".visible = false,");
|
|
|
|
|
add_line!(self, ".named = true,");
|
|
|
|
|
}
|
|
|
|
|
VariableType::Auxiliary => {
|
|
|
|
|
add_line!(self, ".visible = false,");
|
|
|
|
|
add_line!(self, ".named = false,");
|
|
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
for (alias, matching_symbol) in &self.alias_map {
|
|
|
|
|
if matching_symbol.is_none() {
|
|
|
|
|
add_line!(self, "[{}] = {{", self.alias_ids[&alias]);
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, ".visible = true,");
|
|
|
|
|
add_line!(self, ".named = {},", alias.is_named);
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
2018-12-20 13:36:39 -08:00
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
fn add_alias_sequences(&mut self) {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
2019-02-08 16:14:18 -08:00
|
|
|
"static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
|
2019-02-12 11:06:18 -08:00
|
|
|
self.parse_table.production_infos.len()
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-02-12 11:06:18 -08:00
|
|
|
for (i, production_info) in self.parse_table.production_infos.iter().enumerate() {
|
|
|
|
|
if production_info.alias_sequence.is_empty() {
|
2019-08-12 14:11:40 -07:00
|
|
|
// Work around MSVC's intolerance of empty array initializers by
|
|
|
|
|
// explicitly zero-initializing the first element.
|
|
|
|
|
if i == 0 {
|
2019-08-12 14:50:24 -07:00
|
|
|
add_line!(self, "[0] = {{0}},");
|
2019-08-12 14:11:40 -07:00
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "[{}] = {{", i);
|
|
|
|
|
indent!(self);
|
2019-02-12 11:06:18 -08:00
|
|
|
for (j, alias) in production_info.alias_sequence.iter().enumerate() {
|
2019-02-07 17:18:33 -08:00
|
|
|
if let Some(alias) = alias {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
2019-02-07 17:18:33 -08:00
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
fn add_field_sequences(&mut self) {
|
|
|
|
|
let mut flat_field_maps = vec![];
|
2019-02-08 16:06:29 -08:00
|
|
|
let mut next_flat_field_map_index = 0;
|
2019-02-07 17:18:33 -08:00
|
|
|
self.get_field_map_id(
|
|
|
|
|
&Vec::new(),
|
|
|
|
|
&mut flat_field_maps,
|
|
|
|
|
&mut next_flat_field_map_index,
|
2019-02-07 12:29:20 -08:00
|
|
|
);
|
2019-02-07 17:18:33 -08:00
|
|
|
|
|
|
|
|
let mut field_map_ids = Vec::new();
|
2019-02-12 11:06:18 -08:00
|
|
|
for production_info in &self.parse_table.production_infos {
|
|
|
|
|
if !production_info.field_map.is_empty() {
|
2019-02-07 17:18:33 -08:00
|
|
|
let mut flat_field_map = Vec::new();
|
2019-02-12 11:06:18 -08:00
|
|
|
for (field_name, locations) in &production_info.field_map {
|
2019-02-07 17:18:33 -08:00
|
|
|
for location in locations {
|
|
|
|
|
flat_field_map.push((field_name.clone(), *location));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
field_map_ids.push((
|
|
|
|
|
self.get_field_map_id(
|
|
|
|
|
&flat_field_map,
|
|
|
|
|
&mut flat_field_maps,
|
|
|
|
|
&mut next_flat_field_map_index,
|
|
|
|
|
),
|
|
|
|
|
flat_field_map.len(),
|
|
|
|
|
));
|
|
|
|
|
} else {
|
|
|
|
|
field_map_ids.push((0, 0));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-08 16:06:29 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
2019-12-09 11:43:42 -08:00
|
|
|
"static const TSFieldMapSlice ts_field_map_slices[{}] = {{",
|
|
|
|
|
self.parse_table.production_infos.len(),
|
2019-02-08 16:06:29 -08:00
|
|
|
);
|
2019-02-07 12:29:20 -08:00
|
|
|
indent!(self);
|
2019-02-12 11:06:18 -08:00
|
|
|
for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
|
2019-02-07 17:18:33 -08:00
|
|
|
if length > 0 {
|
2019-02-08 16:06:29 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {{.index = {}, .length = {}}},",
|
2019-02-12 11:06:18 -08:00
|
|
|
production_id,
|
2019-02-08 16:06:29 -08:00
|
|
|
row_id,
|
|
|
|
|
length
|
|
|
|
|
);
|
2019-02-07 12:29:20 -08:00
|
|
|
}
|
2019-02-07 17:18:33 -08:00
|
|
|
}
|
2019-02-08 16:06:29 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
2019-02-07 12:29:20 -08:00
|
|
|
|
2019-02-08 16:06:29 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static const TSFieldMapEntry ts_field_map_entries[] = {{",
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-02-07 17:18:33 -08:00
|
|
|
for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
|
|
|
|
|
add_line!(self, "[{}] =", row_index);
|
2019-02-07 12:29:20 -08:00
|
|
|
indent!(self);
|
2019-02-07 17:18:33 -08:00
|
|
|
for (field_name, location) in field_pairs {
|
2019-02-08 16:06:29 -08:00
|
|
|
add_whitespace!(self);
|
|
|
|
|
add!(self, "{{{}, {}", self.field_id(&field_name), location.index);
|
|
|
|
|
if location.inherited {
|
|
|
|
|
add!(self, ", .inherited = true");
|
|
|
|
|
}
|
|
|
|
|
add!(self, "}},\n");
|
2019-02-07 12:29:20 -08:00
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
}
|
2019-02-07 17:18:33 -08:00
|
|
|
|
2019-02-07 12:29:20 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static bool {}(TSLexer *lexer, TSStateId state) {{",
|
|
|
|
|
name
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, "START_LEXER();");
|
2019-10-31 14:32:10 -07:00
|
|
|
|
|
|
|
|
if self.next_abi {
|
|
|
|
|
add_line!(self, "eof = lexer->eof(lexer);");
|
|
|
|
|
} else {
|
|
|
|
|
add_line!(self, "eof = lookahead == 0;");
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "switch (state) {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
|
|
|
|
|
for (i, state) in lex_table.states.into_iter().enumerate() {
|
|
|
|
|
add_line!(self, "case {}:", i);
|
|
|
|
|
indent!(self);
|
2019-06-19 21:36:01 -07:00
|
|
|
self.add_lex_state(state);
|
2018-12-23 10:16:03 -08:00
|
|
|
dedent!(self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add_line!(self, "default:");
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, "return false;");
|
|
|
|
|
dedent!(self);
|
|
|
|
|
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}}");
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}}");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-19 21:36:01 -07:00
|
|
|
fn add_lex_state(&mut self, state: LexState) {
|
2018-12-23 10:16:03 -08:00
|
|
|
if let Some(accept_action) = state.accept_action {
|
2019-01-03 14:08:24 -08:00
|
|
|
add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
2019-10-30 17:11:15 -07:00
|
|
|
if let Some(eof_action) = state.eof_action {
|
|
|
|
|
add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
let mut ruled_out_characters = HashSet::new();
|
|
|
|
|
for (characters, action) in state.advance_actions {
|
|
|
|
|
let previous_length = self.buffer.len();
|
|
|
|
|
|
2019-01-02 12:34:40 -08:00
|
|
|
add_whitespace!(self);
|
2018-12-23 10:16:03 -08:00
|
|
|
add!(self, "if (");
|
|
|
|
|
if self.add_character_set_condition(&characters, &ruled_out_characters) {
|
2019-06-19 21:08:59 -07:00
|
|
|
add!(self, ") ");
|
2019-06-19 21:36:01 -07:00
|
|
|
self.add_advance_action(&action);
|
2018-12-23 10:16:03 -08:00
|
|
|
if let CharacterSet::Include(chars) = characters {
|
2019-01-02 12:34:40 -08:00
|
|
|
ruled_out_characters.extend(chars.iter().map(|c| *c as u32));
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
self.buffer.truncate(previous_length);
|
2019-06-19 21:36:01 -07:00
|
|
|
self.add_advance_action(&action);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
2019-06-19 21:08:59 -07:00
|
|
|
add!(self, "\n");
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add_line!(self, "END_STATE();");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_character_set_condition(
|
|
|
|
|
&mut self,
|
|
|
|
|
characters: &CharacterSet,
|
2019-01-02 12:34:40 -08:00
|
|
|
ruled_out_characters: &HashSet<u32>,
|
|
|
|
|
) -> bool {
|
|
|
|
|
match characters {
|
|
|
|
|
CharacterSet::Include(chars) => {
|
|
|
|
|
let ranges = Self::get_ranges(chars, ruled_out_characters);
|
|
|
|
|
self.add_character_range_conditions(ranges, false)
|
|
|
|
|
}
|
|
|
|
|
CharacterSet::Exclude(chars) => {
|
2019-01-03 14:08:24 -08:00
|
|
|
let ranges = Some('\0'..'\0')
|
|
|
|
|
.into_iter()
|
|
|
|
|
.chain(Self::get_ranges(chars, ruled_out_characters));
|
2019-01-02 12:34:40 -08:00
|
|
|
self.add_character_range_conditions(ranges, true)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_character_range_conditions(
|
|
|
|
|
&mut self,
|
|
|
|
|
ranges: impl Iterator<Item = Range<char>>,
|
|
|
|
|
is_negated: bool,
|
2018-12-23 10:16:03 -08:00
|
|
|
) -> bool {
|
2019-01-02 12:34:40 -08:00
|
|
|
let line_break = "\n ";
|
|
|
|
|
let mut did_add = false;
|
|
|
|
|
for range in ranges {
|
|
|
|
|
if is_negated {
|
|
|
|
|
if did_add {
|
|
|
|
|
add!(self, " &&{}", line_break);
|
|
|
|
|
}
|
|
|
|
|
if range.end == range.start {
|
|
|
|
|
add!(self, "lookahead != ");
|
|
|
|
|
self.add_character(range.start);
|
|
|
|
|
} else if range.end as u32 == range.start as u32 + 1 {
|
|
|
|
|
add!(self, "lookahead != ");
|
|
|
|
|
self.add_character(range.start);
|
|
|
|
|
add!(self, " &&{}lookahead != ", line_break);
|
|
|
|
|
self.add_character(range.end);
|
|
|
|
|
} else {
|
|
|
|
|
add!(self, "(lookahead < ");
|
|
|
|
|
self.add_character(range.start);
|
|
|
|
|
add!(self, " || ");
|
|
|
|
|
self.add_character(range.end);
|
|
|
|
|
add!(self, " < lookahead)");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if did_add {
|
|
|
|
|
add!(self, " ||{}", line_break);
|
|
|
|
|
}
|
|
|
|
|
if range.end == range.start {
|
|
|
|
|
add!(self, "lookahead == ");
|
|
|
|
|
self.add_character(range.start);
|
|
|
|
|
} else if range.end as u32 == range.start as u32 + 1 {
|
|
|
|
|
add!(self, "lookahead == ");
|
|
|
|
|
self.add_character(range.start);
|
|
|
|
|
add!(self, " ||{}lookahead == ", line_break);
|
|
|
|
|
self.add_character(range.end);
|
|
|
|
|
} else {
|
|
|
|
|
add!(self, "(");
|
|
|
|
|
self.add_character(range.start);
|
|
|
|
|
add!(self, " <= lookahead && lookahead <= ");
|
|
|
|
|
self.add_character(range.end);
|
|
|
|
|
add!(self, ")");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
did_add = true;
|
|
|
|
|
}
|
|
|
|
|
did_add
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn get_ranges<'a>(
|
|
|
|
|
chars: &'a Vec<char>,
|
|
|
|
|
ruled_out_characters: &'a HashSet<u32>,
|
|
|
|
|
) -> impl Iterator<Item = Range<char>> + 'a {
|
|
|
|
|
let mut prev_range: Option<Range<char>> = None;
|
|
|
|
|
chars
|
|
|
|
|
.iter()
|
2019-01-03 10:31:14 -08:00
|
|
|
.map(|c| (*c, false))
|
|
|
|
|
.chain(Some(('\0', true)))
|
|
|
|
|
.filter_map(move |(c, done)| {
|
|
|
|
|
if done {
|
|
|
|
|
return prev_range.clone();
|
|
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
if ruled_out_characters.contains(&(c as u32)) {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
if let Some(range) = prev_range.clone() {
|
|
|
|
|
let mut prev_range_successor = range.end as u32 + 1;
|
|
|
|
|
while prev_range_successor < c as u32 {
|
|
|
|
|
if !ruled_out_characters.contains(&prev_range_successor) {
|
|
|
|
|
prev_range = Some(c..c);
|
|
|
|
|
return Some(range);
|
|
|
|
|
}
|
|
|
|
|
prev_range_successor += 1;
|
|
|
|
|
}
|
|
|
|
|
prev_range = Some(range.start..c);
|
|
|
|
|
None
|
|
|
|
|
} else {
|
|
|
|
|
prev_range = Some(c..c);
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
})
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
2019-06-19 21:36:01 -07:00
|
|
|
fn add_advance_action(&mut self, action: &AdvanceAction) {
|
2019-01-02 16:48:44 -08:00
|
|
|
if action.in_main_token {
|
2019-06-19 21:36:01 -07:00
|
|
|
add!(self, "ADVANCE({});", action.state);
|
2019-01-02 16:48:44 -08:00
|
|
|
} else {
|
2019-06-19 21:36:01 -07:00
|
|
|
add!(self, "SKIP({})", action.state);
|
2019-01-02 16:48:44 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn add_lex_modes_list(&mut self) {
|
|
|
|
|
add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{");
|
|
|
|
|
indent!(self);
|
2019-05-16 16:27:05 -07:00
|
|
|
for (i, state) in self.parse_table.states.iter().enumerate() {
|
2019-10-21 13:31:49 -07:00
|
|
|
if state.is_non_terminal_extra
|
|
|
|
|
&& state.terminal_entries.len() == 1
|
|
|
|
|
&& *state.terminal_entries.iter().next().unwrap().0 == Symbol::end()
|
|
|
|
|
{
|
|
|
|
|
add_line!(self, "[{}] = {{-1}},", i,);
|
|
|
|
|
} else if state.external_lex_state_id > 0 {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {{.lex_state = {}, .external_lex_state = {}}},",
|
|
|
|
|
i,
|
|
|
|
|
state.lex_state_id,
|
2019-05-16 16:27:05 -07:00
|
|
|
state.external_lex_state_id
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
add_line!(self, "[{}] = {{.lex_state = {}}},", i, state.lex_state_id);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_external_token_enum(&mut self) {
|
|
|
|
|
add_line!(self, "enum {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
for i in 0..self.syntax_grammar.external_tokens.len() {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"{} = {},",
|
|
|
|
|
self.external_token_id(&self.syntax_grammar.external_tokens[i]),
|
|
|
|
|
i
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_external_scanner_symbol_map(&mut self) {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{"
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
|
|
|
|
for i in 0..self.syntax_grammar.external_tokens.len() {
|
2019-01-04 15:27:15 -08:00
|
|
|
let token = &self.syntax_grammar.external_tokens[i];
|
2019-01-17 17:16:04 -08:00
|
|
|
let id_token = token
|
|
|
|
|
.corresponding_internal_token
|
|
|
|
|
.unwrap_or(Symbol::external(i));
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = {},",
|
2019-01-04 15:27:15 -08:00
|
|
|
self.external_token_id(&token),
|
|
|
|
|
self.symbol_ids[&id_token],
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_external_scanner_states_list(&mut self) {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"static bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{",
|
2019-05-16 16:27:05 -07:00
|
|
|
self.parse_table.external_lex_states.len(),
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-05-16 16:27:05 -07:00
|
|
|
for i in 0..self.parse_table.external_lex_states.len() {
|
|
|
|
|
if !self.parse_table.external_lex_states[i].is_empty() {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "[{}] = {{", i);
|
|
|
|
|
indent!(self);
|
2019-05-16 16:27:05 -07:00
|
|
|
for token in self.parse_table.external_lex_states[i].iter() {
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = true,",
|
2019-08-29 15:26:05 -07:00
|
|
|
self.external_token_id(&self.syntax_grammar.external_tokens[token.index])
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_parse_table(&mut self) {
|
|
|
|
|
let mut parse_table_entries = Vec::new();
|
|
|
|
|
let mut next_parse_action_list_index = 0;
|
|
|
|
|
|
|
|
|
|
self.get_parse_action_list_id(
|
|
|
|
|
&ParseTableEntry {
|
|
|
|
|
actions: Vec::new(),
|
|
|
|
|
reusable: false,
|
|
|
|
|
},
|
|
|
|
|
&mut parse_table_entries,
|
|
|
|
|
&mut next_parse_action_list_index,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
2019-08-28 17:14:04 -07:00
|
|
|
"static uint16_t ts_parse_table[{}][SYMBOL_COUNT] = {{",
|
|
|
|
|
if self.next_abi {
|
|
|
|
|
"LARGE_STATE_COUNT"
|
|
|
|
|
} else {
|
|
|
|
|
"STATE_COUNT"
|
|
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
indent!(self);
|
2019-08-29 15:26:05 -07:00
|
|
|
|
|
|
|
|
let mut terminal_entries = Vec::new();
|
|
|
|
|
let mut nonterminal_entries = Vec::new();
|
|
|
|
|
|
2019-05-16 16:59:50 -07:00
|
|
|
for (i, state) in self
|
|
|
|
|
.parse_table
|
|
|
|
|
.states
|
|
|
|
|
.iter()
|
|
|
|
|
.enumerate()
|
|
|
|
|
.take(self.large_state_count)
|
|
|
|
|
{
|
|
|
|
|
add_line!(self, "[{}] = {{", i);
|
|
|
|
|
indent!(self);
|
|
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
// Ensure the entries are in a deterministic order, since they are
|
|
|
|
|
// internally represented as a hash map.
|
2019-08-29 15:26:05 -07:00
|
|
|
terminal_entries.clear();
|
|
|
|
|
nonterminal_entries.clear();
|
|
|
|
|
terminal_entries.extend(state.terminal_entries.iter());
|
|
|
|
|
nonterminal_entries.extend(state.nonterminal_entries.iter());
|
2019-05-16 16:59:50 -07:00
|
|
|
terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
|
|
|
|
|
nonterminal_entries.sort_unstable_by_key(|k| k.0);
|
2019-08-29 15:26:05 -07:00
|
|
|
|
2019-10-21 13:31:49 -07:00
|
|
|
for (symbol, action) in &nonterminal_entries {
|
2019-05-16 16:59:50 -07:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = STATE({}),",
|
|
|
|
|
self.symbol_ids[symbol],
|
2019-10-21 13:31:49 -07:00
|
|
|
match action {
|
|
|
|
|
GotoAction::Goto(state) => *state,
|
|
|
|
|
GotoAction::ShiftExtra => i,
|
|
|
|
|
}
|
2019-05-16 16:59:50 -07:00
|
|
|
);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
2019-08-29 15:26:05 -07:00
|
|
|
|
|
|
|
|
for (symbol, entry) in &terminal_entries {
|
2018-12-23 10:16:03 -08:00
|
|
|
let entry_id = self.get_parse_action_list_id(
|
|
|
|
|
entry,
|
|
|
|
|
&mut parse_table_entries,
|
|
|
|
|
&mut next_parse_action_list_index,
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[{}] = ACTIONS({}),",
|
|
|
|
|
self.symbol_ids[symbol],
|
|
|
|
|
entry_id
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
|
2019-05-16 17:19:44 -07:00
|
|
|
if self.large_state_count < self.parse_table.states.len() {
|
|
|
|
|
add_line!(self, "static uint16_t ts_small_parse_table[] = {{");
|
2019-05-16 16:59:50 -07:00
|
|
|
indent!(self);
|
2019-08-30 20:29:55 -07:00
|
|
|
|
|
|
|
|
let mut index = 0;
|
|
|
|
|
let mut small_state_indices = Vec::new();
|
|
|
|
|
let mut symbols_by_value: HashMap<(usize, SymbolType), Vec<Symbol>> = HashMap::new();
|
2019-05-16 17:19:44 -07:00
|
|
|
for state in self.parse_table.states.iter().skip(self.large_state_count) {
|
2019-08-30 20:29:55 -07:00
|
|
|
small_state_indices.push(index);
|
|
|
|
|
symbols_by_value.clear();
|
2019-05-16 16:59:50 -07:00
|
|
|
|
2019-05-16 17:19:44 -07:00
|
|
|
terminal_entries.clear();
|
|
|
|
|
terminal_entries.extend(state.terminal_entries.iter());
|
|
|
|
|
terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
|
|
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
// In a given parse state, many lookahead symbols have the same actions.
|
|
|
|
|
// So in the "small state" representation, group symbols by their action
|
|
|
|
|
// in order to avoid repeating the action.
|
2019-05-16 17:19:44 -07:00
|
|
|
for (symbol, entry) in &terminal_entries {
|
|
|
|
|
let entry_id = self.get_parse_action_list_id(
|
|
|
|
|
entry,
|
|
|
|
|
&mut parse_table_entries,
|
|
|
|
|
&mut next_parse_action_list_index,
|
|
|
|
|
);
|
2019-08-30 20:29:55 -07:00
|
|
|
symbols_by_value
|
|
|
|
|
.entry((entry_id, SymbolType::Terminal))
|
|
|
|
|
.or_default()
|
|
|
|
|
.push(**symbol);
|
|
|
|
|
}
|
2019-10-21 13:31:49 -07:00
|
|
|
for (symbol, action) in &state.nonterminal_entries {
|
|
|
|
|
let state_id = match action {
|
|
|
|
|
GotoAction::Goto(i) => *i,
|
|
|
|
|
GotoAction::ShiftExtra => {
|
|
|
|
|
self.large_state_count + small_state_indices.len() - 1
|
|
|
|
|
}
|
|
|
|
|
};
|
2019-08-30 20:29:55 -07:00
|
|
|
symbols_by_value
|
2019-10-21 13:31:49 -07:00
|
|
|
.entry((state_id, SymbolType::NonTerminal))
|
2019-08-30 20:29:55 -07:00
|
|
|
.or_default()
|
|
|
|
|
.push(*symbol);
|
2019-05-16 17:19:44 -07:00
|
|
|
}
|
2019-05-16 16:59:50 -07:00
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
let mut values_with_symbols = symbols_by_value.drain().collect::<Vec<_>>();
|
|
|
|
|
values_with_symbols.sort_unstable_by_key(|((value, kind), symbols)| {
|
|
|
|
|
(symbols.len(), *kind, *value, symbols[0])
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
add_line!(self, "[{}] = {},", index, values_with_symbols.len());
|
|
|
|
|
indent!(self);
|
|
|
|
|
|
|
|
|
|
for ((value, kind), symbols) in values_with_symbols.iter_mut() {
|
|
|
|
|
if *kind == SymbolType::NonTerminal {
|
|
|
|
|
add_line!(self, "STATE({}), {},", value, symbols.len());
|
|
|
|
|
} else {
|
|
|
|
|
add_line!(self, "ACTIONS({}), {},", value, symbols.len());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
symbols.sort_unstable();
|
|
|
|
|
indent!(self);
|
|
|
|
|
for symbol in symbols {
|
|
|
|
|
add_line!(self, "{},", self.symbol_ids[symbol]);
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
2019-05-16 17:19:44 -07:00
|
|
|
}
|
2019-08-30 20:29:55 -07:00
|
|
|
|
2019-05-16 17:19:44 -07:00
|
|
|
dedent!(self);
|
2019-05-16 16:59:50 -07:00
|
|
|
|
2019-08-30 20:29:55 -07:00
|
|
|
index += 1 + values_with_symbols
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|(_, symbols)| 2 + symbols.len())
|
|
|
|
|
.sum::<usize>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
|
|
|
|
|
add_line!(self, "static uint32_t ts_small_parse_table_map[] = {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
for i in self.large_state_count..self.parse_table.states.len() {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"[SMALL_STATE({})] = {},",
|
|
|
|
|
i,
|
|
|
|
|
small_state_indices[i - self.large_state_count]
|
|
|
|
|
);
|
2019-05-16 16:59:50 -07:00
|
|
|
}
|
|
|
|
|
dedent!(self);
|
2019-05-16 17:19:44 -07:00
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
2019-05-16 16:59:50 -07:00
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
self.add_parse_action_list(parse_table_entries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) {
|
|
|
|
|
add_line!(self, "static TSParseActionEntry ts_parse_actions[] = {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
for (i, entry) in parse_table_entries {
|
|
|
|
|
add!(
|
|
|
|
|
self,
|
2020-04-28 13:38:08 +02:00
|
|
|
" [{}] = {{ .entry = {{.count = {}, .reusable = {}}} }},",
|
2018-12-23 10:16:03 -08:00
|
|
|
i,
|
|
|
|
|
entry.actions.len(),
|
|
|
|
|
entry.reusable
|
|
|
|
|
);
|
|
|
|
|
for action in entry.actions {
|
|
|
|
|
add!(self, " ");
|
|
|
|
|
match action {
|
|
|
|
|
ParseAction::Accept => add!(self, " ACCEPT_INPUT()"),
|
|
|
|
|
ParseAction::Recover => add!(self, "RECOVER()"),
|
|
|
|
|
ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"),
|
|
|
|
|
ParseAction::Shift {
|
|
|
|
|
state,
|
|
|
|
|
is_repetition,
|
|
|
|
|
} => {
|
|
|
|
|
if is_repetition {
|
|
|
|
|
add!(self, "SHIFT_REPEAT({})", state);
|
|
|
|
|
} else {
|
|
|
|
|
add!(self, "SHIFT({})", state);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ParseAction::Reduce {
|
|
|
|
|
symbol,
|
|
|
|
|
child_count,
|
|
|
|
|
dynamic_precedence,
|
2019-02-12 11:06:18 -08:00
|
|
|
production_id,
|
2018-12-23 10:16:03 -08:00
|
|
|
..
|
|
|
|
|
} => {
|
|
|
|
|
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
|
|
|
|
|
if dynamic_precedence != 0 {
|
|
|
|
|
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
|
|
|
|
|
}
|
2019-02-12 11:06:18 -08:00
|
|
|
if production_id != 0 {
|
|
|
|
|
add!(self, ", .production_id = {}", production_id);
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
add!(self, ")");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
add!(self, ",")
|
|
|
|
|
}
|
|
|
|
|
add!(self, "\n");
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn add_parser_export(&mut self) {
|
|
|
|
|
let language_function_name = format!("tree_sitter_{}", self.language_name);
|
|
|
|
|
let external_scanner_name = format!("{}_external_scanner", language_function_name);
|
|
|
|
|
|
Add a language linkage declaration to parsers
Previously, in order to compile a `tree-sitter` grammar that contained
c++ source in the parser (ie the `scanner.cc` file), you would have to
compile the `parser.c` file separately from the c++ files. For example,
in rust this would result in a `build.rs` close to the following:
```
extern crate cc;
fn main() {
let dir: PathBuf = ["tree-sitter-ruby", "src"].iter().collect();
cc::Build::new()
.include(&dir)
.cpp(true)
.file(dir.join("scanner.cc"))
// NOTE: must have a name that differs from the c static lib
.compile("tree-sitter-ruby-scanner");
cc::Build::new()
.include(&dir)
.file(dir.join("parser.c"))
// NOTE: must have a name that differs from the c++ static lib
.compile("tree-sitter-ruby-parser");
}
```
This was necessary at the time for the following grammars: `ruby`,
`php`, `python`, `embedded-template`, `html`, `cpp`, `ocaml`,
`bash`, `agda`, and `haskell`.
To solve this, we specify an `extern "C"` language linkage declaration
to the functions that must be linked against to compile a parser with the
scanner, making parsers linkable against c++ source.
On all major compilers (gcc, clang, and msvc) this should be the only
change needed due to the combination of clang and gcc both supporting
designated initialization for years and msvc 2019 adopting designated
initializers as a part of the C++20 conformance push.
Subsequently, for rust projects, the necessary `build.rs` would become
(which also brings these parsers into sync with the current docs):
```
extern crate cc;
fn main() {
let dir: PathBuf = ["tree-sitter-ruby", "src"].iter().collect();
cc::Build::new()
.include(&dir)
.cpp(true)
.file(dir.join("scanner.cc"))
.file(dir.join("parser.c"))
.compile("tree-sitter-ruby");
}
```
2020-02-18 19:18:19 -08:00
|
|
|
add_line!(self, "#ifdef __cplusplus");
|
|
|
|
|
add_line!(self, r#"extern "C" {{"#);
|
|
|
|
|
add_line!(self, "#endif");
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
if !self.syntax_grammar.external_tokens.is_empty() {
|
2019-03-21 16:06:06 -07:00
|
|
|
add_line!(self, "void *{}_create(void);", external_scanner_name);
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, "void {}_destroy(void *);", external_scanner_name);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"bool {}_scan(void *, TSLexer *, const bool *);",
|
|
|
|
|
external_scanner_name
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"unsigned {}_serialize(void *, char *);",
|
|
|
|
|
external_scanner_name
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
"void {}_deserialize(void *, const char *, unsigned);",
|
|
|
|
|
external_scanner_name
|
|
|
|
|
);
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add_line!(self, "#ifdef _WIN32");
|
|
|
|
|
add_line!(self, "#define extern __declspec(dllexport)");
|
|
|
|
|
add_line!(self, "#endif");
|
|
|
|
|
add_line!(self, "");
|
|
|
|
|
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
2019-03-21 16:06:06 -07:00
|
|
|
"extern const TSLanguage *{}(void) {{",
|
2018-12-23 10:16:03 -08:00
|
|
|
language_function_name
|
|
|
|
|
);
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, "static TSLanguage language = {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, ".version = LANGUAGE_VERSION,");
|
|
|
|
|
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
|
|
|
|
|
add_line!(self, ".alias_count = ALIAS_COUNT,");
|
|
|
|
|
add_line!(self, ".token_count = TOKEN_COUNT,");
|
2019-08-28 17:14:04 -07:00
|
|
|
|
|
|
|
|
if self.next_abi {
|
|
|
|
|
add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".parse_table = (const unsigned short *)ts_parse_table,"
|
|
|
|
|
);
|
2019-05-16 17:19:44 -07:00
|
|
|
|
|
|
|
|
if self.large_state_count < self.parse_table.states.len() {
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".small_parse_table = (const uint16_t *)ts_small_parse_table,"
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map,"
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
add_line!(self, ".parse_actions = ts_parse_actions,");
|
|
|
|
|
add_line!(self, ".lex_modes = ts_lex_modes,");
|
|
|
|
|
add_line!(self, ".symbol_names = ts_symbol_names,");
|
2019-01-12 21:57:34 -08:00
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
if self.next_abi {
|
|
|
|
|
add_line!(self, ".public_symbol_map = ts_symbol_map,");
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-11 09:22:30 -07:00
|
|
|
if !self.parse_table.production_infos.is_empty() {
|
2019-01-12 21:57:34 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
|
|
|
|
|
);
|
2019-02-07 17:18:33 -08:00
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
add_line!(self, ".field_count = FIELD_COUNT,");
|
|
|
|
|
|
|
|
|
|
if !self.field_names.is_empty() {
|
2019-02-07 12:29:20 -08:00
|
|
|
add_line!(self, ".field_names = ts_field_names,");
|
2019-02-08 16:06:29 -08:00
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices,"
|
|
|
|
|
);
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries,"
|
|
|
|
|
);
|
2019-01-12 21:57:34 -08:00
|
|
|
}
|
2018-12-23 10:16:03 -08:00
|
|
|
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
2019-02-08 16:14:18 -08:00
|
|
|
".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
|
2018-12-23 10:16:03 -08:00
|
|
|
);
|
|
|
|
|
add_line!(self, ".lex_fn = ts_lex,");
|
|
|
|
|
|
|
|
|
|
if let Some(keyword_capture_token) = self.keyword_capture_token {
|
|
|
|
|
add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
|
|
|
|
|
add_line!(
|
|
|
|
|
self,
|
|
|
|
|
".keyword_capture_token = {},",
|
|
|
|
|
self.symbol_ids[&keyword_capture_token]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
|
|
|
|
|
|
|
|
|
|
if !self.syntax_grammar.external_tokens.is_empty() {
|
|
|
|
|
add_line!(self, ".external_scanner = {{");
|
|
|
|
|
indent!(self);
|
|
|
|
|
add_line!(self, "(const bool *)ts_external_scanner_states,");
|
|
|
|
|
add_line!(self, "ts_external_scanner_symbol_map,");
|
|
|
|
|
add_line!(self, "{}_create,", external_scanner_name);
|
|
|
|
|
add_line!(self, "{}_destroy,", external_scanner_name);
|
|
|
|
|
add_line!(self, "{}_scan,", external_scanner_name);
|
|
|
|
|
add_line!(self, "{}_serialize,", external_scanner_name);
|
|
|
|
|
add_line!(self, "{}_deserialize,", external_scanner_name);
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}},");
|
|
|
|
|
}
|
|
|
|
|
dedent!(self);
|
|
|
|
|
|
|
|
|
|
add_line!(self, "}};");
|
|
|
|
|
add_line!(self, "return &language;");
|
|
|
|
|
dedent!(self);
|
|
|
|
|
add_line!(self, "}}");
|
Add a language linkage declaration to parsers
Previously, in order to compile a `tree-sitter` grammar that contained
c++ source in the parser (ie the `scanner.cc` file), you would have to
compile the `parser.c` file separately from the c++ files. For example,
in rust this would result in a `build.rs` close to the following:
```
extern crate cc;
fn main() {
let dir: PathBuf = ["tree-sitter-ruby", "src"].iter().collect();
cc::Build::new()
.include(&dir)
.cpp(true)
.file(dir.join("scanner.cc"))
// NOTE: must have a name that differs from the c static lib
.compile("tree-sitter-ruby-scanner");
cc::Build::new()
.include(&dir)
.file(dir.join("parser.c"))
// NOTE: must have a name that differs from the c++ static lib
.compile("tree-sitter-ruby-parser");
}
```
This was necessary at the time for the following grammars: `ruby`,
`php`, `python`, `embedded-template`, `html`, `cpp`, `ocaml`,
`bash`, `agda`, and `haskell`.
To solve this, we specify an `extern "C"` language linkage declaration
to the functions that must be linked against to compile a parser with the
scanner, making parsers linkable against c++ source.
On all major compilers (gcc, clang, and msvc) this should be the only
change needed due to the combination of clang and gcc both supporting
designated initialization for years and msvc 2019 adopting designated
initializers as a part of the C++20 conformance push.
Subsequently, for rust projects, the necessary `build.rs` would become
(which also brings these parsers into sync with the current docs):
```
extern crate cc;
fn main() {
let dir: PathBuf = ["tree-sitter-ruby", "src"].iter().collect();
cc::Build::new()
.include(&dir)
.cpp(true)
.file(dir.join("scanner.cc"))
.file(dir.join("parser.c"))
.compile("tree-sitter-ruby");
}
```
2020-02-18 19:18:19 -08:00
|
|
|
add_line!(self, "#ifdef __cplusplus");
|
|
|
|
|
add_line!(self, "}}");
|
|
|
|
|
add_line!(self, "#endif");
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn get_parse_action_list_id(
|
|
|
|
|
&self,
|
|
|
|
|
entry: &ParseTableEntry,
|
|
|
|
|
parse_table_entries: &mut Vec<(usize, ParseTableEntry)>,
|
|
|
|
|
next_parse_action_list_index: &mut usize,
|
|
|
|
|
) -> usize {
|
|
|
|
|
if let Some((index, _)) = parse_table_entries.iter().find(|(_, e)| *e == *entry) {
|
|
|
|
|
return *index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let result = *next_parse_action_list_index;
|
|
|
|
|
parse_table_entries.push((result, entry.clone()));
|
|
|
|
|
*next_parse_action_list_index += 1 + entry.actions.len();
|
|
|
|
|
result
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
fn get_field_map_id(
|
|
|
|
|
&self,
|
|
|
|
|
flat_field_map: &Vec<(String, FieldLocation)>,
|
|
|
|
|
flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
|
|
|
|
|
next_flat_field_map_index: &mut usize,
|
|
|
|
|
) -> usize {
|
|
|
|
|
if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
|
|
|
|
|
return *index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let result = *next_flat_field_map_index;
|
|
|
|
|
flat_field_maps.push((result, flat_field_map.clone()));
|
|
|
|
|
*next_flat_field_map_index += flat_field_map.len();
|
|
|
|
|
result
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn external_token_id(&self, token: &ExternalToken) -> String {
|
|
|
|
|
format!(
|
|
|
|
|
"ts_external_token_{}",
|
|
|
|
|
self.sanitize_identifier(&token.name)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet<String>) {
|
2018-12-20 13:36:39 -08:00
|
|
|
let mut id;
|
|
|
|
|
if symbol == Symbol::end() {
|
|
|
|
|
id = "ts_builtin_sym_end".to_string();
|
|
|
|
|
} else {
|
|
|
|
|
let (name, kind) = self.metadata_for_symbol(symbol);
|
|
|
|
|
id = match kind {
|
2018-12-23 10:16:03 -08:00
|
|
|
VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)),
|
|
|
|
|
VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)),
|
2018-12-20 13:36:39 -08:00
|
|
|
VariableType::Hidden | VariableType::Named => {
|
2018-12-23 10:16:03 -08:00
|
|
|
format!("sym_{}", self.sanitize_identifier(name))
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let mut suffix_number = 1;
|
|
|
|
|
let mut suffix = String::new();
|
2018-12-23 10:16:03 -08:00
|
|
|
while used_identifiers.contains(&id) {
|
2018-12-20 13:36:39 -08:00
|
|
|
id.drain(id.len() - suffix.len()..);
|
|
|
|
|
suffix_number += 1;
|
|
|
|
|
suffix = suffix_number.to_string();
|
|
|
|
|
id += &suffix;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
used_identifiers.insert(id.clone());
|
2018-12-20 13:36:39 -08:00
|
|
|
self.symbol_ids.insert(symbol, id);
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-07 12:29:20 -08:00
|
|
|
fn field_id(&self, field_name: &String) -> String {
|
2019-02-08 16:06:29 -08:00
|
|
|
format!("field_{}", field_name)
|
2019-02-07 12:29:20 -08:00
|
|
|
}
|
|
|
|
|
|
2018-12-20 13:36:39 -08:00
|
|
|
fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
|
|
|
|
|
match symbol.kind {
|
2019-01-04 11:19:53 -08:00
|
|
|
SymbolType::End => ("end", VariableType::Hidden),
|
2018-12-20 13:36:39 -08:00
|
|
|
SymbolType::NonTerminal => {
|
|
|
|
|
let variable = &self.syntax_grammar.variables[symbol.index];
|
|
|
|
|
(&variable.name, variable.kind)
|
|
|
|
|
}
|
|
|
|
|
SymbolType::Terminal => {
|
|
|
|
|
let variable = &self.lexical_grammar.variables[symbol.index];
|
|
|
|
|
(&variable.name, variable.kind)
|
|
|
|
|
}
|
|
|
|
|
SymbolType::External => {
|
|
|
|
|
let token = &self.syntax_grammar.external_tokens[symbol.index];
|
|
|
|
|
(&token.name, token.kind)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn sanitize_identifier(&self, name: &str) -> String {
|
|
|
|
|
let mut result = String::with_capacity(name.len());
|
|
|
|
|
for c in name.chars() {
|
|
|
|
|
if ('a' <= c && c <= 'z')
|
|
|
|
|
|| ('A' <= c && c <= 'Z')
|
|
|
|
|
|| ('0' <= c && c <= '9')
|
|
|
|
|
|| c == '_'
|
|
|
|
|
{
|
|
|
|
|
result.push(c);
|
|
|
|
|
} else {
|
2019-01-02 12:34:40 -08:00
|
|
|
let replacement = match c {
|
2018-12-23 10:16:03 -08:00
|
|
|
'~' => "TILDE",
|
|
|
|
|
'`' => "BQUOTE",
|
|
|
|
|
'!' => "BANG",
|
|
|
|
|
'@' => "AT",
|
|
|
|
|
'#' => "POUND",
|
|
|
|
|
'$' => "DOLLAR",
|
|
|
|
|
'%' => "PERCENT",
|
|
|
|
|
'^' => "CARET",
|
|
|
|
|
'&' => "AMP",
|
|
|
|
|
'*' => "STAR",
|
|
|
|
|
'(' => "LPAREN",
|
|
|
|
|
')' => "RPAREN",
|
|
|
|
|
'-' => "DASH",
|
|
|
|
|
'+' => "PLUS",
|
|
|
|
|
'=' => "EQ",
|
|
|
|
|
'{' => "LBRACE",
|
|
|
|
|
'}' => "RBRACE",
|
|
|
|
|
'[' => "LBRACK",
|
|
|
|
|
']' => "RBRACK",
|
|
|
|
|
'\\' => "BSLASH",
|
|
|
|
|
'|' => "PIPE",
|
|
|
|
|
':' => "COLON",
|
|
|
|
|
';' => "SEMI",
|
|
|
|
|
'"' => "DQUOTE",
|
|
|
|
|
'\'' => "SQUOTE",
|
|
|
|
|
'<' => "LT",
|
|
|
|
|
'>' => "GT",
|
|
|
|
|
',' => "COMMA",
|
|
|
|
|
'.' => "DOT",
|
|
|
|
|
'?' => "QMARK",
|
|
|
|
|
'/' => "SLASH",
|
|
|
|
|
'\n' => "LF",
|
|
|
|
|
'\r' => "CR",
|
|
|
|
|
'\t' => "TAB",
|
|
|
|
|
_ => continue,
|
2019-01-02 12:34:40 -08:00
|
|
|
};
|
|
|
|
|
if !result.is_empty() && !result.ends_with("_") {
|
|
|
|
|
result.push('_');
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
result += replacement;
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
result
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
|
2018-12-23 10:16:03 -08:00
|
|
|
fn sanitize_string(&self, name: &str) -> String {
|
|
|
|
|
let mut result = String::with_capacity(name.len());
|
|
|
|
|
for c in name.chars() {
|
2019-01-11 17:43:27 -08:00
|
|
|
match c {
|
|
|
|
|
'\"' => result += "\\\"",
|
|
|
|
|
'\\' => result += "\\\\",
|
2019-11-13 10:54:34 -08:00
|
|
|
'\u{000c}' => result += "\\f",
|
2019-01-12 21:42:31 -08:00
|
|
|
'\n' => result += "\\n",
|
|
|
|
|
'\r' => result += "\\r",
|
2019-11-13 10:54:34 -08:00
|
|
|
'\t' => result += "\\t",
|
2019-01-11 17:43:27 -08:00
|
|
|
_ => result.push(c),
|
2018-12-23 10:16:03 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
result
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
|
|
|
|
|
fn add_character(&mut self, c: char) {
|
2019-11-13 10:54:34 -08:00
|
|
|
match c {
|
|
|
|
|
'\'' => add!(self, "'\\''"),
|
|
|
|
|
'\\' => add!(self, "'\\\\'"),
|
|
|
|
|
'\u{000c}' => add!(self, "'\\f'"),
|
|
|
|
|
'\n' => add!(self, "'\\n'"),
|
|
|
|
|
'\t' => add!(self, "'\\t'"),
|
|
|
|
|
'\r' => add!(self, "'\\r'"),
|
|
|
|
|
_ => {
|
|
|
|
|
if c == ' ' || c.is_ascii_graphic() {
|
|
|
|
|
add!(self, "'{}'", c)
|
|
|
|
|
} else {
|
|
|
|
|
add!(self, "{}", c as u32)
|
|
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
2018-12-05 12:50:12 -08:00
|
|
|
|
2019-08-28 17:14:04 -07:00
|
|
|
/// Returns a String of C code for the given components of a parser.
|
|
|
|
|
///
|
|
|
|
|
/// # Arguments
|
|
|
|
|
///
|
|
|
|
|
/// * `name` - A string slice containing the name of the language
|
|
|
|
|
/// * `parse_table` - The generated parse table for the language
|
|
|
|
|
/// * `main_lex_table` - The generated lexing table for the language
|
|
|
|
|
/// * `keyword_lex_table` - The generated keyword lexing table for the language
|
|
|
|
|
/// * `keyword_capture_token` - A symbol indicating which token is used
|
|
|
|
|
/// for keyword capture, if any.
|
|
|
|
|
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
|
|
|
|
|
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
|
|
|
|
|
/// * `simple_aliases` - A map describing the global rename rules that should apply.
|
|
|
|
|
/// the keys are symbols that are *always* aliased in the same way, and the values
|
|
|
|
|
/// are the aliases that are applied to those symbols.
|
|
|
|
|
/// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse
|
|
|
|
|
/// table format. This is mainly used for testing, when developing Tree-sitter itself.
|
2018-12-06 22:11:52 -08:00
|
|
|
pub(crate) fn render_c_code(
|
2018-12-05 12:50:12 -08:00
|
|
|
name: &str,
|
|
|
|
|
parse_table: ParseTable,
|
|
|
|
|
main_lex_table: LexTable,
|
|
|
|
|
keyword_lex_table: LexTable,
|
|
|
|
|
keyword_capture_token: Option<Symbol>,
|
|
|
|
|
syntax_grammar: SyntaxGrammar,
|
|
|
|
|
lexical_grammar: LexicalGrammar,
|
|
|
|
|
simple_aliases: AliasMap,
|
2019-08-28 17:14:04 -07:00
|
|
|
next_abi: bool,
|
2018-12-05 12:50:12 -08:00
|
|
|
) -> String {
|
2018-12-20 13:36:39 -08:00
|
|
|
Generator {
|
|
|
|
|
buffer: String::new(),
|
|
|
|
|
indent_level: 0,
|
|
|
|
|
language_name: name.to_string(),
|
2019-05-16 16:59:50 -07:00
|
|
|
large_state_count: 0,
|
2018-12-20 13:36:39 -08:00
|
|
|
parse_table,
|
|
|
|
|
main_lex_table,
|
|
|
|
|
keyword_lex_table,
|
|
|
|
|
keyword_capture_token,
|
|
|
|
|
syntax_grammar,
|
|
|
|
|
lexical_grammar,
|
|
|
|
|
simple_aliases,
|
|
|
|
|
symbol_ids: HashMap::new(),
|
2019-05-16 16:59:50 -07:00
|
|
|
symbol_order: HashMap::new(),
|
2018-12-23 10:16:03 -08:00
|
|
|
alias_ids: HashMap::new(),
|
2019-08-29 15:26:05 -07:00
|
|
|
alias_map: BTreeMap::new(),
|
2019-02-07 12:29:20 -08:00
|
|
|
field_names: Vec::new(),
|
2019-08-28 17:14:04 -07:00
|
|
|
next_abi,
|
2018-12-20 13:36:39 -08:00
|
|
|
}
|
|
|
|
|
.generate()
|
2018-12-05 12:50:12 -08:00
|
|
|
}
|
2019-01-02 12:34:40 -08:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_get_char_ranges() {
|
|
|
|
|
struct Row {
|
|
|
|
|
chars: Vec<char>,
|
|
|
|
|
ruled_out_chars: Vec<char>,
|
|
|
|
|
expected_ranges: Vec<Range<char>>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let table = [
|
|
|
|
|
Row {
|
|
|
|
|
chars: vec!['a'],
|
|
|
|
|
ruled_out_chars: vec![],
|
|
|
|
|
expected_ranges: vec!['a'..'a'],
|
|
|
|
|
},
|
|
|
|
|
Row {
|
|
|
|
|
chars: vec!['a', 'b', 'c', 'e', 'z'],
|
|
|
|
|
ruled_out_chars: vec![],
|
|
|
|
|
expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'],
|
|
|
|
|
},
|
|
|
|
|
Row {
|
|
|
|
|
chars: vec!['a', 'b', 'c', 'e', 'h', 'z'],
|
|
|
|
|
ruled_out_chars: vec!['d', 'f', 'g'],
|
|
|
|
|
expected_ranges: vec!['a'..'h', 'z'..'z'],
|
|
|
|
|
},
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for Row {
|
|
|
|
|
chars,
|
|
|
|
|
ruled_out_chars,
|
|
|
|
|
expected_ranges,
|
|
|
|
|
} in table.iter()
|
|
|
|
|
{
|
|
|
|
|
let ruled_out_chars = ruled_out_chars
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|c: &char| *c as u32)
|
|
|
|
|
.collect();
|
|
|
|
|
let ranges = Generator::get_ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
|
|
|
|
|
assert_eq!(ranges, *expected_ranges);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|