From 34a65f588d5e352d656df727bf6adafa2eca6894 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 21 Dec 2016 11:24:41 -0800 Subject: [PATCH] Tweak naming and organization of external-scanner related language fields --- include/tree_sitter/parser.h | 12 ++-- src/compiler/generate_code/c_code.cc | 87 ++++++++++++++-------------- src/runtime/language.h | 2 +- src/runtime/parser.c | 10 ++-- 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 90247719..eea5f76f 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -52,7 +52,7 @@ typedef struct { typedef struct { uint16_t lex_state; - uint16_t external_tokens; + uint16_t external_lex_state; } TSLexMode; typedef union { @@ -74,15 +74,15 @@ typedef struct TSLanguage { const TSParseActionEntry *parse_actions; const TSLexMode *lex_modes; bool (*lex_fn)(TSLexer *, TSStateId); - const TSSymbol *external_token_symbol_map; - const bool *external_token_lists; struct { + const bool *states; + const TSSymbol *symbol_map; void *(*create)(); - bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + void (*destroy)(void *); void (*reset)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); bool (*serialize)(void *, TSExternalTokenState); void (*deserialize)(void *, TSExternalTokenState); - void (*destroy)(void *); } external_scanner; } TSLanguage; @@ -175,8 +175,6 @@ typedef struct TSLanguage { .symbol_names = ts_symbol_names, \ .lex_fn = ts_lex, \ .external_token_count = EXTERNAL_TOKEN_COUNT, \ - .external_token_lists = (const bool *)ts_external_token_lists, \ - .external_token_symbol_map = ts_external_token_symbol_map, \ .external_scanner = {__VA_ARGS__} \ }; \ return &language \ diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 7c3601a3..d5eab8b0 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -75,7 +75,7 @@ class CCodeGenerator { const LexicalGrammar lexical_grammar; map sanitized_names; vector> parse_table_entries; - vector> external_token_id_sets; + vector> external_scanner_states; size_t next_parse_action_list_index; public: @@ -102,11 +102,12 @@ class CCodeGenerator { add_lex_function(); add_lex_modes_list(); - if (!syntax_grammar.external_tokens.empty()) + if (!syntax_grammar.external_tokens.empty()) { add_external_token_enum(); + add_external_scanner_symbol_map(); + add_external_scanner_states_list(); + } - add_external_token_symbol_map(); - add_external_scan_modes_list(); add_parse_table(); add_parser_export(); @@ -258,7 +259,7 @@ class CCodeGenerator { } if (needs_external_scanner) { - add(", .external_tokens = " + add_external_scanner_state(external_token_indices)); + add(", .external_lex_state = " + add_external_scanner_state(external_token_indices)); } add("},"); @@ -269,11 +270,11 @@ class CCodeGenerator { } string add_external_scanner_state(set external_token_ids) { - for (size_t i = 0, n = external_token_id_sets.size(); i < n; i++) - if (external_token_id_sets[i] == external_token_ids) + for (size_t i = 0, n = external_scanner_states.size(); i < n; i++) + if (external_scanner_states[i] == external_token_ids) return to_string(i); - external_token_id_sets.push_back(external_token_ids); - return to_string(external_token_id_sets.size() - 1); + external_scanner_states.push_back(external_token_ids); + return to_string(external_scanner_states.size() - 1); } void add_external_token_enum() { @@ -286,8 +287,8 @@ class CCodeGenerator { line(); } - void add_external_token_symbol_map() { - line("TSSymbol ts_external_token_symbol_map[EXTERNAL_TOKEN_COUNT] = {"); + void add_external_scanner_symbol_map() { + line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {"); indent([&]() { for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) { line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ","); @@ -297,17 +298,17 @@ class CCodeGenerator { line(); } - void add_external_scan_modes_list() { - line("static bool ts_external_token_lists["); - add(to_string(external_token_id_sets.size())); + void add_external_scanner_states_list() { + line("static bool ts_external_scanner_states["); + add(to_string(external_scanner_states.size())); add("][EXTERNAL_TOKEN_COUNT] = {"); indent([&]() { size_t i = 0; - for (const auto &external_token_ids : external_token_id_sets) { - if (!external_token_ids.empty()) { + for (const auto &valid_external_lookaheads : external_scanner_states) { + if (!valid_external_lookaheads.empty()) { line("[" + to_string(i) + "] = {"); indent([&]() { - for (Symbol::Index id : external_token_ids) { + for (Symbol::Index id : valid_external_lookaheads) { line("[" + external_token_id(id) + "] = true,"); } }); @@ -352,40 +353,38 @@ class CCodeGenerator { } void add_parser_export() { - if (!syntax_grammar.external_tokens.empty()) { - string external_scanner_name = "ts_language_" + name + "_external_scanner"; + string external_scanner_name = "ts_language_" + name + "_external_scanner"; + if (!syntax_grammar.external_tokens.empty()) { line("void *" + external_scanner_name + "_create();"); - line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);"); + line("void " + external_scanner_name + "_destroy();"); line("void " + external_scanner_name + "_reset(void *);"); + line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);"); line("bool " + external_scanner_name + "_serialize(void *, TSExternalTokenState);"); line("void " + external_scanner_name + "_deserialize(void *, TSExternalTokenState);"); - line("void " + external_scanner_name + "_destroy();"); line(); - - line("const TSLanguage *ts_language_" + name + "() {"); - indent([&]() { - if (!syntax_grammar.external_tokens.empty()) { - line("GET_LANGUAGE("); - indent([&]() { - line(external_scanner_name + "_create,"); - line(external_scanner_name + "_scan,"); - line(external_scanner_name + "_reset,"); - line(external_scanner_name + "_serialize,"); - line(external_scanner_name + "_deserialize,"); - line(external_scanner_name + "_destroy,"); - }); - line(");"); - } - }); - line("}"); - } else { - line("const TSLanguage *ts_language_" + name + "() {"); - indent([&]() { - line("GET_LANGUAGE();"); - }); - line("}"); } + + line("const TSLanguage *ts_language_" + name + "() {"); + indent([&]() { + line("GET_LANGUAGE("); + if (syntax_grammar.external_tokens.empty()) { + add(");"); + } else { + indent([&]() { + line("(const bool *)ts_external_scanner_states,"); + line("ts_external_scanner_symbol_map,"); + line(external_scanner_name + "_create,"); + line(external_scanner_name + "_destroy,"); + line(external_scanner_name + "_reset,"); + line(external_scanner_name + "_scan,"); + line(external_scanner_name + "_serialize,"); + line(external_scanner_name + "_deserialize,"); + }); + line(");"); + } + }); + line("}"); line(); } diff --git a/src/runtime/language.h b/src/runtime/language.h index 56e275bd..20e6ec5d 100644 --- a/src/runtime/language.h +++ b/src/runtime/language.h @@ -59,7 +59,7 @@ ts_language_enabled_external_tokens(const TSLanguage *self, if (external_scanner_state == 0) { return NULL; } else { - return self->external_token_lists + self->external_token_count * external_scanner_state; + return self->external_scanner.states + self->external_token_count * external_scanner_state; } } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 6787e1ac..f5b08f82 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -164,7 +164,7 @@ static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree, if (ts_language_is_symbol_external(self->language, tree->first_leaf.symbol)) return false; if (tree->size.bytes == 0) return false; if (tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state && - tree->first_leaf.lex_mode.external_tokens == current_lex_mode.external_tokens) + tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state) return true; if (!table_entry->is_reusable) return false; @@ -249,7 +249,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) { TSLexMode lex_mode = self->language->lex_modes[parse_state]; const bool *external_tokens = ts_language_enabled_external_tokens( self->language, - lex_mode.external_tokens + lex_mode.external_lex_state ); bool found_external_token = false; @@ -263,7 +263,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) { Length current_position = self->lexer.current_position; if (external_tokens) { - LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_tokens, + LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state, current_position.extent.row, current_position.extent.column); parser__restore_external_scanner(self, version); ts_lexer_start(&self->lexer); @@ -288,7 +288,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) { lex_mode = self->language->lex_modes[ERROR_STATE]; external_tokens = ts_language_enabled_external_tokens( self->language, - lex_mode.external_tokens + lex_mode.external_lex_state ); ts_lexer_reset(&self->lexer, start_position); continue; @@ -320,7 +320,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) { result = ts_tree_make_error(size, padding, first_error_character); } else { TSSymbol symbol = self->lexer.data.result_symbol; - if (found_external_token) symbol = self->language->external_token_symbol_map[symbol]; + if (found_external_token) symbol = self->language->external_scanner.symbol_map[symbol]; Length padding = length_sub(self->lexer.token_start_position, start_position); Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);