Add IMMEDIATE_TOKEN rule type, for enforcing no preceding extras

This commit is contained in:
Max Brunsfeld 2018-08-01 14:00:57 -07:00
parent f1821bb04d
commit cb784975a4
13 changed files with 212 additions and 59 deletions

View file

@ -23,6 +23,7 @@ using std::pair;
using std::set;
using std::string;
using std::to_string;
using std::unordered_set;
using std::vector;
using util::escape_char;
using rules::Symbol;
@ -76,7 +77,7 @@ class CCodeGenerator {
Symbol keyword_capture_token;
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
map<Symbol, string> symbol_ids;
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
vector<set<Symbol::Index>> external_scanner_states;
size_t next_parse_action_list_index;
@ -165,6 +166,24 @@ class CCodeGenerator {
}
}
unordered_set<string> symbol_id_values;
symbol_ids[rules::END_OF_INPUT()] = "ts_builtin_sym_end";
for (const Symbol &symbol : parse_table.symbols) {
if (!symbol.is_built_in()) {
assign_symbol_id(symbol, &symbol_id_values);
}
}
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
const ExternalToken &external_token = syntax_grammar.external_tokens[i];
if (external_token.corresponding_internal_token == rules::NONE()) {
assign_symbol_id(Symbol::external(i), &symbol_id_values);
} else {
symbol_ids[Symbol::external(i)] = symbol_ids[external_token.corresponding_internal_token];
}
}
line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION));
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
@ -175,6 +194,33 @@ class CCodeGenerator {
line();
}
void assign_symbol_id(const Symbol &symbol, unordered_set<string> *symbol_id_values) {
auto entry = entry_for_symbol(symbol);
string symbol_id;
switch (entry.second) {
case VariableTypeAuxiliary:
symbol_id = "aux_sym_" + sanitize_name(entry.first);
break;
case VariableTypeAnonymous:
symbol_id = "anon_sym_" + sanitize_name(entry.first);
break;
default:
symbol_id = "sym_" + sanitize_name(entry.first);
break;
}
unsigned suffix_number = 1;
string unique_symbol_id = symbol_id;
while (symbol_id_values->count(unique_symbol_id)) {
suffix_number++;
unique_symbol_id = symbol_id + to_string(suffix_number);
}
symbol_id_values->insert(unique_symbol_id);
symbol_ids[symbol] = unique_symbol_id;
}
void add_symbol_enum() {
line("enum {");
indent([&]() {
@ -696,20 +742,7 @@ class CCodeGenerator {
}
string symbol_id(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
return "ts_builtin_sym_end";
auto entry = entry_for_symbol(symbol);
string name = sanitize_name(entry.first);
switch (entry.second) {
case VariableTypeAuxiliary:
return "aux_sym_" + name;
case VariableTypeAnonymous:
return "anon_sym_" + name;
default:
return "sym_" + name;
}
return symbol_ids[symbol];
}
string alias_id(const Alias &alias) {
@ -776,47 +809,35 @@ class CCodeGenerator {
return name;
}
string sanitize_name(string name) {
auto existing = sanitized_names.find(name);
if (existing != sanitized_names.end())
return existing->second;
string stripped_name;
string sanitize_name(const string &name) {
string result;
for (char c : name) {
if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') || (c == '_')) {
stripped_name += c;
result += c;
} else {
auto replacement = REPLACEMENTS.find(c);
size_t i = stripped_name.size();
size_t i = result.size();
if (replacement != REPLACEMENTS.end()) {
if (i > 0 && stripped_name[i - 1] != '_')
stripped_name += "_";
stripped_name += replacement->second;
if (i > 0 && result[i - 1] != '_')
result += "_";
result += replacement->second;
}
}
}
for (size_t extra_number = 0;; extra_number++) {
string suffix = extra_number ? to_string(extra_number) : "";
string unique_name = stripped_name + suffix;
if (unique_name == "")
continue;
if (!has_sanitized_name(unique_name)) {
sanitized_names.insert({ name, unique_name });
return unique_name;
}
}
return result;
}
string _boolean(bool value) {
return value ? "true" : "false";
}
bool has_sanitized_name(string name) {
for (const auto &pair : sanitized_names)
if (pair.second == name)
bool has_sanitized_name(const Symbol &symbol, string name) {
for (const auto &pair : symbol_ids) {
if (pair.second == name) {
return true;
}
}
return false;
}