Tweak naming and organization of external-scanner related language fields
This commit is contained in:
parent
42c41c158c
commit
34a65f588d
4 changed files with 54 additions and 57 deletions
|
|
@ -52,7 +52,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_tokens;
|
||||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef union {
|
||||
|
|
@ -74,15 +74,15 @@ typedef struct TSLanguage {
|
|||
const TSParseActionEntry *parse_actions;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
const TSSymbol *external_token_symbol_map;
|
||||
const bool *external_token_lists;
|
||||
struct {
|
||||
const bool *states;
|
||||
const TSSymbol *symbol_map;
|
||||
void *(*create)();
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
void (*destroy)(void *);
|
||||
void (*reset)(void *);
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
bool (*serialize)(void *, TSExternalTokenState);
|
||||
void (*deserialize)(void *, TSExternalTokenState);
|
||||
void (*destroy)(void *);
|
||||
} external_scanner;
|
||||
} TSLanguage;
|
||||
|
||||
|
|
@ -175,8 +175,6 @@ typedef struct TSLanguage {
|
|||
.symbol_names = ts_symbol_names, \
|
||||
.lex_fn = ts_lex, \
|
||||
.external_token_count = EXTERNAL_TOKEN_COUNT, \
|
||||
.external_token_lists = (const bool *)ts_external_token_lists, \
|
||||
.external_token_symbol_map = ts_external_token_symbol_map, \
|
||||
.external_scanner = {__VA_ARGS__} \
|
||||
}; \
|
||||
return &language \
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class CCodeGenerator {
|
|||
const LexicalGrammar lexical_grammar;
|
||||
map<string, string> sanitized_names;
|
||||
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
|
||||
vector<set<Symbol::Index>> external_token_id_sets;
|
||||
vector<set<Symbol::Index>> external_scanner_states;
|
||||
size_t next_parse_action_list_index;
|
||||
|
||||
public:
|
||||
|
|
@ -102,11 +102,12 @@ class CCodeGenerator {
|
|||
add_lex_function();
|
||||
add_lex_modes_list();
|
||||
|
||||
if (!syntax_grammar.external_tokens.empty())
|
||||
if (!syntax_grammar.external_tokens.empty()) {
|
||||
add_external_token_enum();
|
||||
add_external_scanner_symbol_map();
|
||||
add_external_scanner_states_list();
|
||||
}
|
||||
|
||||
add_external_token_symbol_map();
|
||||
add_external_scan_modes_list();
|
||||
add_parse_table();
|
||||
add_parser_export();
|
||||
|
||||
|
|
@ -258,7 +259,7 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
if (needs_external_scanner) {
|
||||
add(", .external_tokens = " + add_external_scanner_state(external_token_indices));
|
||||
add(", .external_lex_state = " + add_external_scanner_state(external_token_indices));
|
||||
}
|
||||
|
||||
add("},");
|
||||
|
|
@ -269,11 +270,11 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
string add_external_scanner_state(set<Symbol::Index> external_token_ids) {
|
||||
for (size_t i = 0, n = external_token_id_sets.size(); i < n; i++)
|
||||
if (external_token_id_sets[i] == external_token_ids)
|
||||
for (size_t i = 0, n = external_scanner_states.size(); i < n; i++)
|
||||
if (external_scanner_states[i] == external_token_ids)
|
||||
return to_string(i);
|
||||
external_token_id_sets.push_back(external_token_ids);
|
||||
return to_string(external_token_id_sets.size() - 1);
|
||||
external_scanner_states.push_back(external_token_ids);
|
||||
return to_string(external_scanner_states.size() - 1);
|
||||
}
|
||||
|
||||
void add_external_token_enum() {
|
||||
|
|
@ -286,8 +287,8 @@ class CCodeGenerator {
|
|||
line();
|
||||
}
|
||||
|
||||
void add_external_token_symbol_map() {
|
||||
line("TSSymbol ts_external_token_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
|
||||
void add_external_scanner_symbol_map() {
|
||||
line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
|
||||
indent([&]() {
|
||||
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
|
||||
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ",");
|
||||
|
|
@ -297,17 +298,17 @@ class CCodeGenerator {
|
|||
line();
|
||||
}
|
||||
|
||||
void add_external_scan_modes_list() {
|
||||
line("static bool ts_external_token_lists[");
|
||||
add(to_string(external_token_id_sets.size()));
|
||||
void add_external_scanner_states_list() {
|
||||
line("static bool ts_external_scanner_states[");
|
||||
add(to_string(external_scanner_states.size()));
|
||||
add("][EXTERNAL_TOKEN_COUNT] = {");
|
||||
indent([&]() {
|
||||
size_t i = 0;
|
||||
for (const auto &external_token_ids : external_token_id_sets) {
|
||||
if (!external_token_ids.empty()) {
|
||||
for (const auto &valid_external_lookaheads : external_scanner_states) {
|
||||
if (!valid_external_lookaheads.empty()) {
|
||||
line("[" + to_string(i) + "] = {");
|
||||
indent([&]() {
|
||||
for (Symbol::Index id : external_token_ids) {
|
||||
for (Symbol::Index id : valid_external_lookaheads) {
|
||||
line("[" + external_token_id(id) + "] = true,");
|
||||
}
|
||||
});
|
||||
|
|
@ -352,40 +353,38 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_parser_export() {
|
||||
if (!syntax_grammar.external_tokens.empty()) {
|
||||
string external_scanner_name = "ts_language_" + name + "_external_scanner";
|
||||
string external_scanner_name = "ts_language_" + name + "_external_scanner";
|
||||
|
||||
if (!syntax_grammar.external_tokens.empty()) {
|
||||
line("void *" + external_scanner_name + "_create();");
|
||||
line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
|
||||
line("void " + external_scanner_name + "_destroy();");
|
||||
line("void " + external_scanner_name + "_reset(void *);");
|
||||
line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
|
||||
line("bool " + external_scanner_name + "_serialize(void *, TSExternalTokenState);");
|
||||
line("void " + external_scanner_name + "_deserialize(void *, TSExternalTokenState);");
|
||||
line("void " + external_scanner_name + "_destroy();");
|
||||
line();
|
||||
|
||||
line("const TSLanguage *ts_language_" + name + "() {");
|
||||
indent([&]() {
|
||||
if (!syntax_grammar.external_tokens.empty()) {
|
||||
line("GET_LANGUAGE(");
|
||||
indent([&]() {
|
||||
line(external_scanner_name + "_create,");
|
||||
line(external_scanner_name + "_scan,");
|
||||
line(external_scanner_name + "_reset,");
|
||||
line(external_scanner_name + "_serialize,");
|
||||
line(external_scanner_name + "_deserialize,");
|
||||
line(external_scanner_name + "_destroy,");
|
||||
});
|
||||
line(");");
|
||||
}
|
||||
});
|
||||
line("}");
|
||||
} else {
|
||||
line("const TSLanguage *ts_language_" + name + "() {");
|
||||
indent([&]() {
|
||||
line("GET_LANGUAGE();");
|
||||
});
|
||||
line("}");
|
||||
}
|
||||
|
||||
line("const TSLanguage *ts_language_" + name + "() {");
|
||||
indent([&]() {
|
||||
line("GET_LANGUAGE(");
|
||||
if (syntax_grammar.external_tokens.empty()) {
|
||||
add(");");
|
||||
} else {
|
||||
indent([&]() {
|
||||
line("(const bool *)ts_external_scanner_states,");
|
||||
line("ts_external_scanner_symbol_map,");
|
||||
line(external_scanner_name + "_create,");
|
||||
line(external_scanner_name + "_destroy,");
|
||||
line(external_scanner_name + "_reset,");
|
||||
line(external_scanner_name + "_scan,");
|
||||
line(external_scanner_name + "_serialize,");
|
||||
line(external_scanner_name + "_deserialize,");
|
||||
});
|
||||
line(");");
|
||||
}
|
||||
});
|
||||
line("}");
|
||||
line();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ ts_language_enabled_external_tokens(const TSLanguage *self,
|
|||
if (external_scanner_state == 0) {
|
||||
return NULL;
|
||||
} else {
|
||||
return self->external_token_lists + self->external_token_count * external_scanner_state;
|
||||
return self->external_scanner.states + self->external_token_count * external_scanner_state;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
|
|||
if (ts_language_is_symbol_external(self->language, tree->first_leaf.symbol)) return false;
|
||||
if (tree->size.bytes == 0) return false;
|
||||
if (tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state &&
|
||||
tree->first_leaf.lex_mode.external_tokens == current_lex_mode.external_tokens)
|
||||
tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state)
|
||||
return true;
|
||||
if (!table_entry->is_reusable)
|
||||
return false;
|
||||
|
|
@ -249,7 +249,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
|
|||
TSLexMode lex_mode = self->language->lex_modes[parse_state];
|
||||
const bool *external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
lex_mode.external_tokens
|
||||
lex_mode.external_lex_state
|
||||
);
|
||||
|
||||
bool found_external_token = false;
|
||||
|
|
@ -263,7 +263,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
|
|||
Length current_position = self->lexer.current_position;
|
||||
|
||||
if (external_tokens) {
|
||||
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_tokens,
|
||||
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state,
|
||||
current_position.extent.row, current_position.extent.column);
|
||||
parser__restore_external_scanner(self, version);
|
||||
ts_lexer_start(&self->lexer);
|
||||
|
|
@ -288,7 +288,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
|
|||
lex_mode = self->language->lex_modes[ERROR_STATE];
|
||||
external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
lex_mode.external_tokens
|
||||
lex_mode.external_lex_state
|
||||
);
|
||||
ts_lexer_reset(&self->lexer, start_position);
|
||||
continue;
|
||||
|
|
@ -320,7 +320,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
|
|||
result = ts_tree_make_error(size, padding, first_error_character);
|
||||
} else {
|
||||
TSSymbol symbol = self->lexer.data.result_symbol;
|
||||
if (found_external_token) symbol = self->language->external_token_symbol_map[symbol];
|
||||
if (found_external_token) symbol = self->language->external_scanner.symbol_map[symbol];
|
||||
|
||||
Length padding = length_sub(self->lexer.token_start_position, start_position);
|
||||
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue