From e0c24e3be6903e4cba0a4ab412b3ec4c92b7dc19 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 2 Mar 2016 09:55:25 -0800 Subject: [PATCH] Remove old error recovery code --- include/tree_sitter/parser.h | 15 +++++----- include/tree_sitter/runtime.h | 6 ++-- src/compiler/build_tables/build_lex_table.cc | 4 +-- .../build_tables/build_parse_table.cc | 6 +--- src/compiler/generate_code/c_code.cc | 7 ----- src/compiler/parse_grammar.cc | 2 +- src/compiler/rules.h | 1 - src/compiler/rules/built_in_symbols.cc | 8 ++--- src/compiler/rules/built_in_symbols.h | 1 - src/compiler/rules/rules.cc | 4 --- src/runtime/language.c | 29 +++++++++++++++---- src/runtime/language.h | 2 ++ src/runtime/node.c | 18 +++++++----- src/runtime/parser.c | 26 +++++++---------- src/runtime/stack.c | 24 ++++++++++----- 15 files changed, 78 insertions(+), 75 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 7d4ad028..e7323322 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -131,13 +131,14 @@ struct TSLanguage { return lexer->accept_fn(lexer, symbol, ts_symbol_metadata[symbol], \ ts_symbol_names[symbol], false); -#define LEX_ERROR() \ - if (error_mode) { \ - if (state == ts_lex_state_error) \ - lexer->advance_fn(lexer, state); \ - GO_TO_STATE(ts_lex_state_error) \ - } else { \ - ACCEPT_TOKEN(ts_builtin_sym_error) \ +#define LEX_ERROR() \ + if (error_mode) { \ + if (state == ts_lex_state_error) \ + lexer->advance_fn(lexer, state); \ + GO_TO_STATE(ts_lex_state_error) \ + } else { \ + return lexer->accept_fn(lexer, ts_builtin_sym_error, (TSSymbolMetadata){}, \ + "ERROR", false); \ } /* diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index e742ace1..d57e60d9 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -101,9 +101,9 @@ size_t ts_document_parse_count(const TSDocument *); size_t ts_language_symbol_count(const TSLanguage *); const char *ts_language_symbol_name(const TSLanguage *, TSSymbol); -#define ts_builtin_sym_error 0 -#define ts_builtin_sym_end 1 -#define ts_builtin_sym_start 2 +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define ts_builtin_sym_start 1 #ifdef __cplusplus } diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 50c0caff..cea7fc44 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -65,9 +65,7 @@ class LexTableBuilder { LexItemSet result; for (const Symbol &symbol : symbols) { vector rules; - if (symbol == rules::ERROR()) { - continue; - } else if (symbol == rules::END_OF_INPUT()) { + if (symbol == rules::END_OF_INPUT()) { rules.push_back(CharacterSet().include(0).copy()); } else if (symbol.is_token) { rule_ptr rule = lex_grammar.variables[symbol.index].rule; diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 634a5afd..a33e7515 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -80,8 +80,6 @@ class ParseTableBuilder { mark_fragile_actions(); remove_duplicate_parse_states(); - parse_table.symbols.insert({ rules::ERROR(), { true } }); - return { parse_table, CompileError::none() }; } @@ -400,9 +398,7 @@ class ParseTableBuilder { string symbol_name(const rules::Symbol &symbol) const { if (symbol.is_built_in()) { - if (symbol == rules::ERROR()) - return "ERROR"; - else if (symbol == rules::END_OF_INPUT()) + if (symbol == rules::END_OF_INPUT()) return "END_OF_INPUT"; else return ""; diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 3819e0b2..99a933b9 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -23,7 +23,6 @@ using std::to_string; using std::vector; using util::escape_char; -static Variable ERROR_ENTRY("error", VariableTypeNamed, rule_ptr()); static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr()); static const map REPLACEMENTS({ @@ -465,8 +464,6 @@ class CCodeGenerator { // Helper functions string symbol_id(const rules::Symbol &symbol) { - if (symbol == rules::ERROR()) - return "ts_builtin_sym_error"; if (symbol == rules::END_OF_INPUT()) return "ts_builtin_sym_end"; @@ -484,16 +481,12 @@ class CCodeGenerator { } string symbol_name(const rules::Symbol &symbol) { - if (symbol == rules::ERROR()) - return "ERROR"; if (symbol == rules::END_OF_INPUT()) return "END"; return entry_for_symbol(symbol).first; } VariableType symbol_type(const rules::Symbol &symbol) { - if (symbol == rules::ERROR()) - return VariableTypeNamed; if (symbol == rules::END_OF_INPUT()) return VariableTypeHidden; return entry_for_symbol(symbol).second; diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc index 17bfe6cf..5c1c3200 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_grammar.cc @@ -90,7 +90,7 @@ ParseRuleResult parse_rule(json_value *rule_json) { json_value content_json = rule_json->operator[]("content"); ParseRuleResult content = parse_rule(&content_json); if (content.rule.get()) { - return { err(content.rule), "" }; + return { content.rule, "" }; } else { error_message = "Invalid error content: " + content.error_message; goto error; diff --git a/src/compiler/rules.h b/src/compiler/rules.h index eb6f0fad..d98a719a 100644 --- a/src/compiler/rules.h +++ b/src/compiler/rules.h @@ -16,7 +16,6 @@ rule_ptr seq(const std::vector &); rule_ptr sym(const std::string &); rule_ptr pattern(const std::string &); rule_ptr str(const std::string &); -rule_ptr err(const rule_ptr &); rule_ptr prec(int precedence, const rule_ptr &); rule_ptr prec_left(const rule_ptr &); rule_ptr prec_left(int precedence, const rule_ptr &); diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc index 5bc210d7..a7a877ec 100644 --- a/src/compiler/rules/built_in_symbols.cc +++ b/src/compiler/rules/built_in_symbols.cc @@ -7,16 +7,12 @@ Symbol END_OF_INPUT() { return Symbol(-1, true); } -Symbol ERROR() { - return Symbol(-2, true); -} - Symbol START() { - return Symbol(-3); + return Symbol(-2); } Symbol NONE() { - return Symbol(-4); + return Symbol(-3); } } // namespace rules diff --git a/src/compiler/rules/built_in_symbols.h b/src/compiler/rules/built_in_symbols.h index 9bb83c31..bdaed01c 100644 --- a/src/compiler/rules/built_in_symbols.h +++ b/src/compiler/rules/built_in_symbols.h @@ -6,7 +6,6 @@ namespace tree_sitter { namespace rules { -Symbol ERROR(); Symbol END_OF_INPUT(); Symbol START(); Symbol NONE(); diff --git a/src/compiler/rules/rules.cc b/src/compiler/rules/rules.cc index fd354dab..b61358e8 100644 --- a/src/compiler/rules/rules.cc +++ b/src/compiler/rules/rules.cc @@ -59,10 +59,6 @@ rule_ptr str(const string &value) { return make_shared(value); } -rule_ptr err(const rule_ptr &rule) { - return choice({ rule, rules::ERROR().copy() }); -} - rule_ptr prec_left(const rule_ptr &rule) { return metadata(rule, { { rules::ASSOCIATIVITY, rules::AssociativityLeft } }); } diff --git a/src/runtime/language.c b/src/runtime/language.c index 02d3c5ab..91150182 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -1,14 +1,16 @@ #include "tree_sitter/parser.h" const TSParseAction *ts_language_actions(const TSLanguage *language, - TSStateId state, TSSymbol sym, + TSStateId state, TSSymbol symbol, size_t *count) { - if (state == ts_parse_state_error) { - state = language->out_of_context_states[sym]; + size_t action_index = 0; + if (symbol != ts_builtin_sym_error) { + if (state == ts_parse_state_error) + state = language->out_of_context_states[symbol]; + action_index = + (language->parse_table + (state * language->symbol_count))[symbol]; } - unsigned action_index = - (language->parse_table + (state * language->symbol_count))[sym]; *count = language->parse_actions[action_index].count; const TSParseActionEntry *entry = language->parse_actions + action_index + 1; return (const TSParseAction *)entry; @@ -26,8 +28,23 @@ size_t ts_language_symbol_count(const TSLanguage *language) { return language->symbol_count; } +TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) { + if (symbol == ts_builtin_sym_error) + return (TSSymbolMetadata){ + .visible = true, + .named = true, + .extra = false, + .structural = true, + }; + else + return language->symbol_metadata[symbol]; +} + const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) { - return language->symbol_names[symbol]; + if (symbol == ts_builtin_sym_error) + return "ERROR"; + else + return language->symbol_names[symbol]; } bool ts_language_symbol_is_in_progress(const TSLanguage *self, TSStateId state, diff --git a/src/runtime/language.h b/src/runtime/language.h index 014e6780..9a0f1825 100644 --- a/src/runtime/language.h +++ b/src/runtime/language.h @@ -13,6 +13,8 @@ const TSParseAction *ts_language_actions(const TSLanguage *, TSStateId, TSSymbol, size_t *); TSParseAction ts_language_last_action(const TSLanguage *, TSStateId, TSSymbol); +TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol); + #ifdef __cplusplus } #endif diff --git a/src/runtime/node.c b/src/runtime/node.c index cfe37580..103117ab 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -225,7 +225,8 @@ void ts_symbol_iterator_next(TSSymbolIterator *self) { } const char *ts_node_name(TSNode self, const TSDocument *document) { - return document->parser.language->symbol_names[ts_node__tree(self)->symbol]; + TSSymbol symbol = ts_node__tree(self)->symbol; + return ts_language_symbol_name(document->parser.language, symbol); } static size_t write_lookahead_to_string(char *string, size_t limit, @@ -239,7 +240,7 @@ static size_t write_lookahead_to_string(char *string, size_t limit, } static size_t ts_tree__write_to_string(const TSTree *self, - const char **symbol_names, char *string, + const TSLanguage *language, char *string, size_t limit, bool is_root, bool include_anonymous) { if (!self) @@ -258,14 +259,15 @@ static size_t ts_tree__write_to_string(const TSTree *self, cursor += snprintf(*writer, limit, "(UNEXPECTED "); cursor += write_lookahead_to_string(*writer, limit, self->lookahead_char); } else { - cursor += snprintf(*writer, limit, "(%s", symbol_names[self->symbol]); + cursor += snprintf(*writer, limit, "(%s", + ts_language_symbol_name(language, self->symbol)); } } for (size_t i = 0; i < self->child_count; i++) { TSTree *child = self->children[i]; - cursor += ts_tree__write_to_string(child, symbol_names, *writer, limit, - false, include_anonymous); + cursor += ts_tree__write_to_string(child, language, *writer, limit, false, + include_anonymous); } if (visible) @@ -277,11 +279,11 @@ static size_t ts_tree__write_to_string(const TSTree *self, char *ts_node_string(TSNode self, const TSDocument *document) { static char SCRATCH[1]; const TSTree *tree = ts_node__tree(self); - const char **symbol_names = document->parser.language->symbol_names; + const TSLanguage *language = document->parser.language; size_t size = - ts_tree__write_to_string(tree, symbol_names, SCRATCH, 0, true, false) + 1; + ts_tree__write_to_string(tree, language, SCRATCH, 0, true, false) + 1; char *result = ts_malloc(size * sizeof(char)); - ts_tree__write_to_string(tree, symbol_names, result, size, true, false); + ts_tree__write_to_string(tree, language, result, size, true, false); return result; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index d658690e..e040106a 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -30,14 +30,16 @@ fprintf(stderr, "\"\n}\n\n"); \ } -#define LOG_STACK() \ - if (self->print_debugging_graphs) { \ - fputs(ts_stack_dot_graph(self->stack, self->language->symbol_names), \ - stderr); \ - fputs("\n\n", stderr); \ +#define LOG_STACK() \ + if (self->print_debugging_graphs) { \ + char *graph_string = \ + ts_stack_dot_graph(self->stack, self->language->symbol_names); \ + fputs(graph_string, stderr); \ + fputs("\n\n", stderr); \ + ts_free(graph_string); \ } -#define SYM_NAME(sym) self->language->symbol_names[sym] +#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) #define BOOL_STRING(value) (value ? "true" : "false") @@ -258,9 +260,9 @@ static void ts_parser__remove_head(TSParser *self, int head) { } static int ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { - if (!left) + if (!left || left->symbol == ts_builtin_sym_error) return 1; - if (!right) + if (!right || right->symbol == ts_builtin_sym_error) return -1; TSParser *self = data; @@ -321,8 +323,7 @@ static ParseActionResult ts_parser__reduce(TSParser *self, int head, bool extra, bool fragile, bool count_extra) { array_clear(&self->reduce_parents); - const TSSymbolMetadata *all_metadata = self->language->symbol_metadata; - TSSymbolMetadata metadata = all_metadata[symbol]; + TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol); StackPopResultArray pop_results = ts_stack_pop(self->stack, head, child_count, count_extra); if (!pop_results.size) @@ -646,11 +647,6 @@ static ParseActionResult ts_parser__consume_lookahead(TSParser *self, int head, LookaheadState *lookahead_state = array_get(&self->lookahead_states, current_head); - // TODO: Remove this by making a separate symbol for errors returned from - // the lexer. - if (lookahead->symbol == ts_builtin_sym_error) - action.type = TSParseActionTypeError; - LOG_STACK(); switch (action.type) { diff --git a/src/runtime/stack.c b/src/runtime/stack.c index c07f7535..57c4c783 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -513,22 +513,30 @@ size_t ts_stack__write_dot_graph(Stack *self, char *string, size_t n, continue; all_paths_done = false; - cursor += - snprintf(*s, n, "node_%p [label=%d];\n", node, node->entry.state); + cursor += snprintf(*s, n, "node_%p [label=", node); + if (node->entry.state == ts_parse_state_error) + cursor += snprintf(*s, n, "\"?\""); + else + cursor += snprintf(*s, n, "%d", node->entry.state); + cursor += snprintf(*s, n, "];\n"); for (int j = 0; j < node->successor_count; j++) { StackLink successor = node->successors[j]; cursor += snprintf(*s, n, "node_%p -> node_%p [label=\"", node, successor.node); - const char *name = symbol_names[successor.tree->symbol]; - for (const char *c = name; *c; c++) { - if (*c == '\"' || *c == '\\') { - **s = '\\'; + if (successor.tree->symbol == ts_builtin_sym_error) { + cursor += snprintf(*s, n, "ERROR"); + } else { + const char *name = symbol_names[successor.tree->symbol]; + for (const char *c = name; *c; c++) { + if (*c == '\"' || *c == '\\') { + **s = '\\'; + cursor++; + } + **s = *c; cursor++; } - **s = *c; - cursor++; } cursor += snprintf(*s, n, "\"];\n");