Tweak naming and organization of external-scanner related language fields

This commit is contained in:
Max Brunsfeld 2016-12-21 11:24:41 -08:00
parent 42c41c158c
commit 34a65f588d
4 changed files with 54 additions and 57 deletions

View file

@ -52,7 +52,7 @@ typedef struct {
typedef struct {
uint16_t lex_state;
uint16_t external_tokens;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
@ -74,15 +74,15 @@ typedef struct TSLanguage {
const TSParseActionEntry *parse_actions;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
const TSSymbol *external_token_symbol_map;
const bool *external_token_lists;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)();
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
void (*destroy)(void *);
void (*reset)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
bool (*serialize)(void *, TSExternalTokenState);
void (*deserialize)(void *, TSExternalTokenState);
void (*destroy)(void *);
} external_scanner;
} TSLanguage;
@ -175,8 +175,6 @@ typedef struct TSLanguage {
.symbol_names = ts_symbol_names, \
.lex_fn = ts_lex, \
.external_token_count = EXTERNAL_TOKEN_COUNT, \
.external_token_lists = (const bool *)ts_external_token_lists, \
.external_token_symbol_map = ts_external_token_symbol_map, \
.external_scanner = {__VA_ARGS__} \
}; \
return &language \

View file

@ -75,7 +75,7 @@ class CCodeGenerator {
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
vector<set<Symbol::Index>> external_token_id_sets;
vector<set<Symbol::Index>> external_scanner_states;
size_t next_parse_action_list_index;
public:
@ -102,11 +102,12 @@ class CCodeGenerator {
add_lex_function();
add_lex_modes_list();
if (!syntax_grammar.external_tokens.empty())
if (!syntax_grammar.external_tokens.empty()) {
add_external_token_enum();
add_external_scanner_symbol_map();
add_external_scanner_states_list();
}
add_external_token_symbol_map();
add_external_scan_modes_list();
add_parse_table();
add_parser_export();
@ -258,7 +259,7 @@ class CCodeGenerator {
}
if (needs_external_scanner) {
add(", .external_tokens = " + add_external_scanner_state(external_token_indices));
add(", .external_lex_state = " + add_external_scanner_state(external_token_indices));
}
add("},");
@ -269,11 +270,11 @@ class CCodeGenerator {
}
string add_external_scanner_state(set<Symbol::Index> external_token_ids) {
for (size_t i = 0, n = external_token_id_sets.size(); i < n; i++)
if (external_token_id_sets[i] == external_token_ids)
for (size_t i = 0, n = external_scanner_states.size(); i < n; i++)
if (external_scanner_states[i] == external_token_ids)
return to_string(i);
external_token_id_sets.push_back(external_token_ids);
return to_string(external_token_id_sets.size() - 1);
external_scanner_states.push_back(external_token_ids);
return to_string(external_scanner_states.size() - 1);
}
void add_external_token_enum() {
@ -286,8 +287,8 @@ class CCodeGenerator {
line();
}
void add_external_token_symbol_map() {
line("TSSymbol ts_external_token_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
void add_external_scanner_symbol_map() {
line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
indent([&]() {
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ",");
@ -297,17 +298,17 @@ class CCodeGenerator {
line();
}
void add_external_scan_modes_list() {
line("static bool ts_external_token_lists[");
add(to_string(external_token_id_sets.size()));
void add_external_scanner_states_list() {
line("static bool ts_external_scanner_states[");
add(to_string(external_scanner_states.size()));
add("][EXTERNAL_TOKEN_COUNT] = {");
indent([&]() {
size_t i = 0;
for (const auto &external_token_ids : external_token_id_sets) {
if (!external_token_ids.empty()) {
for (const auto &valid_external_lookaheads : external_scanner_states) {
if (!valid_external_lookaheads.empty()) {
line("[" + to_string(i) + "] = {");
indent([&]() {
for (Symbol::Index id : external_token_ids) {
for (Symbol::Index id : valid_external_lookaheads) {
line("[" + external_token_id(id) + "] = true,");
}
});
@ -352,40 +353,38 @@ class CCodeGenerator {
}
void add_parser_export() {
if (!syntax_grammar.external_tokens.empty()) {
string external_scanner_name = "ts_language_" + name + "_external_scanner";
string external_scanner_name = "ts_language_" + name + "_external_scanner";
if (!syntax_grammar.external_tokens.empty()) {
line("void *" + external_scanner_name + "_create();");
line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
line("void " + external_scanner_name + "_destroy();");
line("void " + external_scanner_name + "_reset(void *);");
line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
line("bool " + external_scanner_name + "_serialize(void *, TSExternalTokenState);");
line("void " + external_scanner_name + "_deserialize(void *, TSExternalTokenState);");
line("void " + external_scanner_name + "_destroy();");
line();
line("const TSLanguage *ts_language_" + name + "() {");
indent([&]() {
if (!syntax_grammar.external_tokens.empty()) {
line("GET_LANGUAGE(");
indent([&]() {
line(external_scanner_name + "_create,");
line(external_scanner_name + "_scan,");
line(external_scanner_name + "_reset,");
line(external_scanner_name + "_serialize,");
line(external_scanner_name + "_deserialize,");
line(external_scanner_name + "_destroy,");
});
line(");");
}
});
line("}");
} else {
line("const TSLanguage *ts_language_" + name + "() {");
indent([&]() {
line("GET_LANGUAGE();");
});
line("}");
}
line("const TSLanguage *ts_language_" + name + "() {");
indent([&]() {
line("GET_LANGUAGE(");
if (syntax_grammar.external_tokens.empty()) {
add(");");
} else {
indent([&]() {
line("(const bool *)ts_external_scanner_states,");
line("ts_external_scanner_symbol_map,");
line(external_scanner_name + "_create,");
line(external_scanner_name + "_destroy,");
line(external_scanner_name + "_reset,");
line(external_scanner_name + "_scan,");
line(external_scanner_name + "_serialize,");
line(external_scanner_name + "_deserialize,");
});
line(");");
}
});
line("}");
line();
}

View file

@ -59,7 +59,7 @@ ts_language_enabled_external_tokens(const TSLanguage *self,
if (external_scanner_state == 0) {
return NULL;
} else {
return self->external_token_lists + self->external_token_count * external_scanner_state;
return self->external_scanner.states + self->external_token_count * external_scanner_state;
}
}

View file

@ -164,7 +164,7 @@ static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
if (ts_language_is_symbol_external(self->language, tree->first_leaf.symbol)) return false;
if (tree->size.bytes == 0) return false;
if (tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state &&
tree->first_leaf.lex_mode.external_tokens == current_lex_mode.external_tokens)
tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state)
return true;
if (!table_entry->is_reusable)
return false;
@ -249,7 +249,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
TSLexMode lex_mode = self->language->lex_modes[parse_state];
const bool *external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_tokens
lex_mode.external_lex_state
);
bool found_external_token = false;
@ -263,7 +263,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
Length current_position = self->lexer.current_position;
if (external_tokens) {
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_tokens,
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state,
current_position.extent.row, current_position.extent.column);
parser__restore_external_scanner(self, version);
ts_lexer_start(&self->lexer);
@ -288,7 +288,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
lex_mode = self->language->lex_modes[ERROR_STATE];
external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_tokens
lex_mode.external_lex_state
);
ts_lexer_reset(&self->lexer, start_position);
continue;
@ -320,7 +320,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
result = ts_tree_make_error(size, padding, first_error_character);
} else {
TSSymbol symbol = self->lexer.data.result_symbol;
if (found_external_token) symbol = self->language->external_token_symbol_map[symbol];
if (found_external_token) symbol = self->language->external_scanner.symbol_map[symbol];
Length padding = length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);