diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index e4b27d12..d9a8e197 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -139,12 +139,9 @@ typedef struct TSLanguage { } \ } -#define RECOVER(state_value) \ - { \ - { \ - .type = TSParseActionTypeRecover, \ - .params = {.state = state_value} \ - } \ +#define RECOVER() \ + { \ + { .type = TSParseActionTypeRecover } \ } #define SHIFT_EXTRA() \ diff --git a/project.gyp b/project.gyp index bbb88438..7ae20e54 100644 --- a/project.gyp +++ b/project.gyp @@ -11,7 +11,6 @@ 'externals/json-parser', ], 'sources': [ - 'src/compiler/build_tables/build_tables.cc', 'src/compiler/build_tables/lex_item.cc', 'src/compiler/build_tables/lex_item_transitions.cc', 'src/compiler/build_tables/lex_conflict_manager.cc', @@ -90,7 +89,6 @@ ], 'sources': [ 'src/runtime/document.c', - 'src/runtime/error_costs.c', 'src/runtime/get_changed_ranges.c', 'src/runtime/language.c', 'src/runtime/lexer.c', diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc deleted file mode 100644 index a15aede3..00000000 --- a/src/compiler/build_tables/build_tables.cc +++ /dev/null @@ -1,35 +0,0 @@ -#include "compiler/build_tables/build_tables.h" -#include -#include "compiler/build_tables/lex_table_builder.h" -#include "compiler/build_tables/parse_table_builder.h" -#include "compiler/syntax_grammar.h" -#include "compiler/lexical_grammar.h" -#include "compiler/compile_error.h" - -namespace tree_sitter { -namespace build_tables { - -using std::tuple; -using std::make_tuple; - -tuple build_tables( - const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar -) { - auto lex_table_builder = LexTableBuilder::create(lexical_grammar); - auto parse_table_builder = ParseTableBuilder::create( - syntax_grammar, - lexical_grammar, - lex_table_builder.get() - ); - - auto parse_table_result = parse_table_builder->build(); - ParseTable parse_table = parse_table_result.first; - const CompileError error = parse_table_result.second; - - LexTable lex_table = lex_table_builder->build(&parse_table); - return make_tuple(parse_table, lex_table, error); -} - -} // namespace build_tables -} // namespace tree_sitter diff --git a/src/compiler/build_tables/build_tables.h b/src/compiler/build_tables/build_tables.h deleted file mode 100644 index ed1f4770..00000000 --- a/src/compiler/build_tables/build_tables.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_BUILD_TABLES_H_ -#define COMPILER_BUILD_TABLES_BUILD_TABLES_H_ - -#include -#include "compiler/parse_table.h" -#include "compiler/lex_table.h" -#include "compiler/compile_error.h" - -namespace tree_sitter { - -struct SyntaxGrammar; -struct LexicalGrammar; - -namespace build_tables { - -std::tuple build_tables( - const SyntaxGrammar &, - const LexicalGrammar & -); - -} // namespace build_tables -} // namespace tree_sitter - -#endif // COMPILER_BUILD_TABLES_BUILD_TABLES_H_ diff --git a/src/compiler/build_tables/lex_table_builder.cc b/src/compiler/build_tables/lex_table_builder.cc index b50182f3..662f156c 100644 --- a/src/compiler/build_tables/lex_table_builder.cc +++ b/src/compiler/build_tables/lex_table_builder.cc @@ -9,6 +9,7 @@ #include #include "compiler/build_tables/lex_conflict_manager.h" #include "compiler/build_tables/lex_item.h" +#include "compiler/build_tables/lookahead_set.h" #include "compiler/parse_table.h" #include "compiler/lexical_grammar.h" #include "compiler/rule.h" @@ -33,12 +34,13 @@ using rules::Symbol; using rules::Metadata; using rules::Seq; -class StartingCharacterAggregator { +template +class StartOrEndCharacterAggregator { public: void apply(const Rule &rule) { rule.match( [this](const Seq &sequence) { - apply(*sequence.left); + apply(is_start ? *sequence.left : *sequence.right); }, [this](const rules::Choice &rule) { @@ -47,20 +49,9 @@ class StartingCharacterAggregator { } }, - [this](const rules::Repeat &rule) { - apply(*rule.rule); - }, - - [this](const rules::Metadata &rule) { - apply(*rule.rule); - }, - - [this](const rules::CharacterSet &rule) { - result.add_set(rule); - }, - - [this](const rules::Blank) {}, - + [this](const rules::Repeat &rule) { apply(*rule.rule); }, + [this](const rules::Metadata &rule) { apply(*rule.rule); }, + [this](const rules::CharacterSet &rule) { result.add_set(rule); }, [](auto) {} ); } @@ -68,21 +59,37 @@ class StartingCharacterAggregator { CharacterSet result; }; +using StartingCharacterAggregator = StartOrEndCharacterAggregator; +using EndingCharacterAggregator = StartOrEndCharacterAggregator; + class LexTableBuilderImpl : public LexTableBuilder { LexTable lex_table; const LexicalGrammar grammar; vector separator_rules; LexConflictManager conflict_manager; unordered_map lex_state_ids; - - map following_characters_by_token_index; CharacterSet separator_start_characters; - CharacterSet current_conflict_detection_following_characters; - Symbol::Index current_conflict_detection_token_index; - bool current_conflict_value; + vector starting_characters_by_token; + vector following_characters_by_token; + vector> shadowed_tokens_by_token; + const vector &coincident_tokens_by_token; + vector conflict_status_by_token; + bool conflict_detection_mode; public: - LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) { + LexTableBuilderImpl(const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar, + const vector &following_tokens_by_token, + const vector &coincident_tokens) + : grammar(lexical_grammar), + starting_characters_by_token(lexical_grammar.variables.size()), + following_characters_by_token(lexical_grammar.variables.size()), + shadowed_tokens_by_token(lexical_grammar.variables.size()), + coincident_tokens_by_token(coincident_tokens), + conflict_detection_mode(false) { + + // Compute the possible separator rules and the set of separator characters that can occur + // immediately after any token. StartingCharacterAggregator separator_character_aggregator; for (const auto &rule : grammar.separators) { separator_rules.push_back(Repeat{rule}); @@ -90,75 +97,102 @@ class LexTableBuilderImpl : public LexTableBuilder { } separator_rules.push_back(Blank{}); separator_start_characters = separator_character_aggregator.result; - clear(); - } - LexTable build(ParseTable *parse_table) { - for (ParseState &parse_state : parse_table->states) { - parse_state.lex_state_id = add_lex_state( - item_set_for_terminals(parse_state.terminal_entries) - ); - } - mark_fragile_tokens(parse_table); - remove_duplicate_lex_states(parse_table); - return lex_table; - } + // Compute the set of characters that each token can start with and the set of non-separator + // characters that can follow each token. + for (unsigned i = 0, n = grammar.variables.size(); i < n; i++) { + StartingCharacterAggregator starting_character_aggregator; + starting_character_aggregator.apply(grammar.variables[i].rule); + starting_characters_by_token[i] = starting_character_aggregator.result; - bool detect_conflict(Symbol::Index left, Symbol::Index right, - const vector> &following_terminals_by_terminal_index) { - StartingCharacterAggregator left_starting_characters; - StartingCharacterAggregator right_starting_characters; - left_starting_characters.apply(grammar.variables[left].rule); - right_starting_characters.apply(grammar.variables[right].rule); - if (!left_starting_characters.result.intersects(right_starting_characters.result) && - !left_starting_characters.result.intersects(separator_start_characters) && - !right_starting_characters.result.intersects(separator_start_characters)) { - return false; - } - - auto following_characters_entry = following_characters_by_token_index.find(right); - if (following_characters_entry == following_characters_by_token_index.end()) { - StartingCharacterAggregator aggregator; - for (auto following_token_index : following_terminals_by_terminal_index[right]) { - aggregator.apply(grammar.variables[following_token_index].rule); - } - following_characters_entry = - following_characters_by_token_index.insert({right, aggregator.result}).first; + StartingCharacterAggregator following_character_aggregator; + following_tokens_by_token[i].for_each([&](Symbol following_token) { + following_character_aggregator.apply(grammar.variables[following_token.index].rule); + }); // TODO - Refactor this. In general, a keyword token cannot be followed immediately by // another alphanumeric character. But this requirement is currently not expressed anywhere in // the grammar. So without this hack, we would be overly conservative about merging parse // states because we would often consider `identifier` tokens to *conflict* with keyword // tokens. - if (is_keyword(grammar.variables[right])) { - following_characters_entry->second + if (is_keyword(grammar.variables[i])) { + following_character_aggregator.result .exclude('a', 'z') .exclude('A', 'Z') .exclude('0', '9') .exclude('_') .exclude('$'); } + + following_characters_by_token[i] = following_character_aggregator.result; } - current_conflict_detection_token_index = right; - current_conflict_detection_following_characters = following_characters_entry->second; - add_lex_state(item_set_for_terminals({{Symbol::terminal(left), {}}, {Symbol::terminal(right), {}}})); - bool result = current_conflict_value; + // For each pair of tokens, generate a lex table for just those two tokens and record what + // conflicts arise. + conflict_detection_mode = true; + for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) { + for (Symbol::Index j = 0; j < i; j++) { + if (starting_characters_by_token[i].intersects(starting_characters_by_token[j]) || + starting_characters_by_token[i].intersects(separator_start_characters) || + starting_characters_by_token[j].intersects(separator_start_characters)) { + clear(); + add_lex_state(item_set_for_terminals(LookaheadSet({ + Symbol::terminal(i), + Symbol::terminal(j) + }))); + if (conflict_status_by_token[i]) shadowed_tokens_by_token[j].insert(Symbol::terminal(i)); + if (conflict_status_by_token[j]) shadowed_tokens_by_token[i].insert(Symbol::terminal(j)); + } + } + } + } + + LexTable build(ParseTable *parse_table) { clear(); - return result; + conflict_detection_mode = false; + vector>> starting_token_sets; + + for (ParseState &parse_state : parse_table->states) { + LookaheadSet token_set; + for (auto &entry : parse_state.terminal_entries) { + token_set.insert(entry.first); + } + + bool did_merge = false; + for (auto &pair : starting_token_sets) { + if (merge_token_set(&pair.first, token_set)) { + did_merge = true; + pair.second.push_back(&parse_state); + break; + } + } + + if (!did_merge) starting_token_sets.push_back({token_set, {&parse_state}}); + } + + for (auto &pair : starting_token_sets) { + LexStateId state_id = add_lex_state(item_set_for_terminals(pair.first)); + for (ParseState *parse_state : pair.second) { + parse_state->lex_state_id = state_id; + } + } + mark_fragile_tokens(parse_table); + remove_duplicate_lex_states(parse_table); + return lex_table; } + const set &get_incompatible_tokens(Symbol::Index index) const { + return shadowed_tokens_by_token[index]; + } + + private: bool is_keyword(const LexicalVariable &variable) { - return variable.is_string && iswalpha(get_last_character(variable.rule)); - } - - static uint32_t get_last_character(const Rule &rule) { - return rule.match( - [](const Seq &sequence) { return get_last_character(*sequence.right); }, - [](const rules::CharacterSet &rule) { return *rule.included_chars.begin(); }, - [](const rules::Metadata &rule) { return get_last_character(*rule.rule); }, - [](auto) { return 0; } - ); + EndingCharacterAggregator aggregator; + aggregator.apply(variable.rule); + return + !aggregator.result.includes_all && + aggregator.result.included_chars.size() == 1 && + iswalpha(*aggregator.result.included_chars.begin()); } LexStateId add_lex_state(const LexItemSet &item_set) { @@ -178,11 +212,9 @@ class LexTableBuilderImpl : public LexTableBuilder { void clear() { lex_table.states.clear(); lex_state_ids.clear(); - current_conflict_detection_following_characters = CharacterSet(); - current_conflict_value = false; + conflict_status_by_token = vector(grammar.variables.size(), false); } - private: void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { for (const auto &pair : item_set.transitions()) { const CharacterSet &characters = pair.first; @@ -191,23 +223,28 @@ class LexTableBuilderImpl : public LexTableBuilder { AdvanceAction action(-1, transition.precedence, transition.in_main_token); AcceptTokenAction &accept_action = lex_table.states[state_id].accept_action; if (accept_action.is_present()) { - bool prefer_advancing = conflict_manager.resolve(transition.destination, action, accept_action); - bool can_advance_for_accepted_token = false; - for (const LexItem &item : transition.destination.entries) { - if (item.lhs == accept_action.symbol) { - can_advance_for_accepted_token = true; - } else if (item.lhs.index == current_conflict_detection_token_index && - !prefer_advancing && !transition.in_main_token) { - current_conflict_value = true; - } - } + bool prefer_advancing = conflict_manager.resolve( + transition.destination, + action, + accept_action + ); - if (accept_action.symbol.index == current_conflict_detection_token_index && - !can_advance_for_accepted_token && - (characters.intersects(separator_start_characters) || - (characters.intersects(current_conflict_detection_following_characters) && - grammar.variables[accept_action.symbol.index].is_string))) { - current_conflict_value = true; + if (conflict_detection_mode) { + bool next_item_set_can_yield_this_token = false; + for (const LexItem &item : transition.destination.entries) { + if (item.lhs == accept_action.symbol) { + next_item_set_can_yield_this_token = true; + } else if (!prefer_advancing && !transition.in_main_token) { + conflict_status_by_token[item.lhs.index] = true; + } + } + + if (prefer_advancing && + !next_item_set_can_yield_this_token && + (characters.intersects(following_characters_by_token[accept_action.symbol.index]) || + characters.intersects(separator_start_characters))) { + conflict_status_by_token[accept_action.symbol.index] = true; + } } if (!prefer_advancing) continue; @@ -226,10 +263,15 @@ class LexTableBuilderImpl : public LexTableBuilder { item.lhs.is_built_in() || grammar.variables[item.lhs.index].is_string); AcceptTokenAction &existing_action = lex_table.states[state_id].accept_action; - if (!existing_action.is_present() || - conflict_manager.resolve(action, existing_action)) { - lex_table.states[state_id].accept_action = action; + if (existing_action.is_present()) { + if (conflict_manager.resolve(action, existing_action)) { + conflict_status_by_token[existing_action.symbol.index] = true; + } else { + conflict_status_by_token[action.symbol.index] = true; + continue; + } } + lex_table.states[state_id].accept_action = action; } } } @@ -262,6 +304,39 @@ class LexTableBuilderImpl : public LexTableBuilder { } } + bool merge_token_set(LookaheadSet *left, const LookaheadSet &right) const { + bool is_compatible = true; + + left->for_each([&](Symbol left_symbol) { + if (left_symbol.is_terminal() && !left_symbol.is_built_in() && !right.contains(left_symbol)) { + right.for_each([&](Symbol right_symbol) { + if (shadowed_tokens_by_token[left_symbol.index].count(right_symbol) || + !coincident_tokens_by_token[left_symbol.index].contains(right_symbol)) { + is_compatible = false; + return; + } + }); + } + if (!is_compatible) return; + }); + + right.for_each([&](Symbol right_symbol) { + if (right_symbol.is_terminal() && !right_symbol.is_built_in() && !left->contains(right_symbol)) { + left->for_each([&](Symbol left_symbol) { + if (shadowed_tokens_by_token[right_symbol.index].count(left_symbol) || + !coincident_tokens_by_token[right_symbol.index].contains(left_symbol)) { + is_compatible = false; + return; + } + }); + } + if (!is_compatible) return; + }); + + if (is_compatible) left->insert_all(right); + return is_compatible; + } + void remove_duplicate_lex_states(ParseTable *parse_table) { for (LexState &state : lex_table.states) { state.accept_action.is_string = false; @@ -329,10 +404,9 @@ class LexTableBuilderImpl : public LexTableBuilder { } } - LexItemSet item_set_for_terminals(const map &terminals) { + LexItemSet item_set_for_terminals(const LookaheadSet &terminals) { LexItemSet result; - for (const auto &pair : terminals) { - Symbol symbol = pair.first; + terminals.for_each([&](Symbol symbol) { if (symbol.is_terminal()) { for (const auto &rule : rules_for_symbol(symbol)) { for (const auto &separator_rule : separator_rules) { @@ -348,7 +422,7 @@ class LexTableBuilderImpl : public LexTableBuilder { } } } - } + }); return result; } @@ -369,17 +443,24 @@ class LexTableBuilderImpl : public LexTableBuilder { } }; -unique_ptr LexTableBuilder::create(const LexicalGrammar &grammar) { - return unique_ptr(new LexTableBuilderImpl(grammar)); +unique_ptr LexTableBuilder::create(const SyntaxGrammar &syntax_grammar, + const LexicalGrammar &lexical_grammar, + const vector &following_tokens, + const vector &coincident_tokens) { + return unique_ptr(new LexTableBuilderImpl( + syntax_grammar, + lexical_grammar, + following_tokens, + coincident_tokens + )); } LexTable LexTableBuilder::build(ParseTable *parse_table) { return static_cast(this)->build(parse_table); } -bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right, - const vector> &following_terminals) { - return static_cast(this)->detect_conflict(left, right, following_terminals); +const set &LexTableBuilder::get_incompatible_tokens(Symbol::Index token) const { + return static_cast(this)->get_incompatible_tokens(token); } } // namespace build_tables diff --git a/src/compiler/build_tables/lex_table_builder.h b/src/compiler/build_tables/lex_table_builder.h index 3b896bb7..af36c1a2 100644 --- a/src/compiler/build_tables/lex_table_builder.h +++ b/src/compiler/build_tables/lex_table_builder.h @@ -9,19 +9,22 @@ namespace tree_sitter { struct ParseTable; +struct SyntaxGrammar; struct LexicalGrammar; namespace build_tables { +class LookaheadSet; + class LexTableBuilder { public: - static std::unique_ptr create(const LexicalGrammar &); + static std::unique_ptr create(const SyntaxGrammar &, + const LexicalGrammar &, + const std::vector &, + const std::vector &); LexTable build(ParseTable *); - bool detect_conflict( - rules::Symbol::Index, - rules::Symbol::Index, - const std::vector> &following_terminals_by_terminal_index - ); + const std::set &get_incompatible_tokens(rules::Symbol::Index) const; + protected: LexTableBuilder() = default; }; diff --git a/src/compiler/build_tables/parse_table_builder.cc b/src/compiler/build_tables/parse_table_builder.cc index 7e67b650..8666ce1d 100644 --- a/src/compiler/build_tables/parse_table_builder.cc +++ b/src/compiler/build_tables/parse_table_builder.cc @@ -19,9 +19,10 @@ namespace build_tables { using std::deque; using std::find; -using std::pair; using std::vector; using std::set; +using std::tuple; +using std::make_tuple; using std::map; using std::move; using std::string; @@ -43,32 +44,38 @@ struct ParseStateQueueEntry { class ParseTableBuilderImpl : public ParseTableBuilder { const SyntaxGrammar grammar; const LexicalGrammar lexical_grammar; - unordered_map recovery_item_sets_by_lookahead; unordered_map state_ids_by_item_set; vector item_sets_by_state_id; deque parse_state_queue; ParseTable parse_table; ParseItemSetBuilder item_set_builder; - LexTableBuilder *lex_table_builder; + unique_ptr lex_table_builder; set fragile_reductions; - vector> incompatible_tokens_by_token_index; - vector> following_tokens_by_token_index; - bool processing_recovery_states; + vector following_tokens_by_token; + vector coincident_tokens_by_token; public: - ParseTableBuilderImpl( - const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar, - LexTableBuilder *lex_table_builder - ) : grammar(syntax_grammar), + ParseTableBuilderImpl(const SyntaxGrammar &syntax_grammar, const LexicalGrammar &lexical_grammar) + : grammar(syntax_grammar), lexical_grammar(lexical_grammar), item_set_builder(syntax_grammar, lexical_grammar), - lex_table_builder(lex_table_builder), - incompatible_tokens_by_token_index(lexical_grammar.variables.size()), - following_tokens_by_token_index(lexical_grammar.variables.size()), - processing_recovery_states(false) {} + following_tokens_by_token(lexical_grammar.variables.size()), + coincident_tokens_by_token(lexical_grammar.variables.size()) { - pair build() { + for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) { + coincident_tokens_by_token[i].insert(rules::END_OF_INPUT()); + if (lexical_grammar.variables[i].is_string) { + for (unsigned j = 0; j < i; j++) { + if (lexical_grammar.variables[j].is_string) { + coincident_tokens_by_token[i].insert(Symbol::terminal(j)); + coincident_tokens_by_token[j].insert(Symbol::terminal(i)); + } + } + } + } + } + + tuple build() { // Ensure that the empty rename sequence has index 0. parse_table.alias_sequences.push_back({}); @@ -90,17 +97,21 @@ class ParseTableBuilderImpl : public ParseTableBuilder { }}); CompileError error = process_part_state_queue(); - if (error) return {parse_table, error}; + if (error) return make_tuple(parse_table, LexTable(), error); - compute_unmergable_token_pairs(); + lex_table_builder = LexTableBuilder::create( + grammar, + lexical_grammar, + following_tokens_by_token, + coincident_tokens_by_token + ); - processing_recovery_states = true; build_error_parse_state(error_state_id); - process_part_state_queue(); - mark_fragile_actions(); remove_duplicate_parse_states(); - return {parse_table, CompileError::none()}; + + auto lex_table = lex_table_builder->build(&parse_table); + return make_tuple(parse_table, lex_table, CompileError::none()); } private: @@ -125,54 +136,35 @@ class ParseTableBuilderImpl : public ParseTableBuilder { } void build_error_parse_state(ParseStateId state_id) { - ParseState error_state; - for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) { Symbol token = Symbol::terminal(i); - bool has_non_reciprocal_conflict = false; + const LexicalVariable &variable = lexical_grammar.variables[i]; - for (Symbol incompatible_token : incompatible_tokens_by_token_index[i]) { - if (incompatible_token.is_terminal() && - !incompatible_tokens_by_token_index[incompatible_token.index].count(token)) { - has_non_reciprocal_conflict = true; + bool exclude_from_recovery_state = false; + for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(i)) { + if (!coincident_tokens_by_token[i].contains(incompatible_token) && + ((lexical_grammar.variables[incompatible_token.index].is_string && !variable.is_string) || + !lex_table_builder->get_incompatible_tokens(incompatible_token.index).count(token))) { + exclude_from_recovery_state = true; break; } } - - if (!has_non_reciprocal_conflict) { - add_out_of_context_parse_state(&error_state, Symbol::terminal(i)); + if (!exclude_from_recovery_state) { + parse_table.add_terminal_action(state_id, Symbol::terminal(i), ParseAction::Recover()); } } for (const Symbol &symbol : grammar.extra_tokens) { - if (!error_state.terminal_entries.count(symbol)) { - error_state.terminal_entries[symbol].actions.push_back(ParseAction::ShiftExtra()); + if (!parse_table.states[state_id].terminal_entries.count(symbol)) { + parse_table.add_terminal_action(state_id, symbol, ParseAction::ShiftExtra()); } } for (size_t i = 0; i < grammar.external_tokens.size(); i++) { - add_out_of_context_parse_state(&error_state, Symbol::external(i)); + parse_table.states[state_id].terminal_entries[Symbol::external(i)].actions.push_back(ParseAction::Recover()); } - for (size_t i = 0; i < grammar.variables.size(); i++) { - add_out_of_context_parse_state(&error_state, Symbol::non_terminal(i)); - } - - error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0)); - parse_table.states[state_id] = error_state; - } - - void add_out_of_context_parse_state(ParseState *error_state, - const rules::Symbol &symbol) { - const ParseItemSet &item_set = recovery_item_sets_by_lookahead[symbol]; - if (!item_set.entries.empty()) { - ParseStateId state = add_parse_state({}, item_set); - if (symbol.is_non_terminal()) { - error_state->nonterminal_entries[symbol.index] = state; - } else { - error_state->terminal_entries[symbol].actions.assign({ ParseAction::Recover(state) }); - } - } + parse_table.add_terminal_action(state_id, END_OF_INPUT(), ParseAction::Recover()); } ParseStateId add_parse_state(SymbolSequence &&preceding_symbols, const ParseItemSet &item_set) { @@ -224,7 +216,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { parse_table.add_terminal_action(state_id, lookahead, action); } else { ParseAction &existing_action = entry.actions[0]; - if (existing_action.type == ParseActionTypeAccept || processing_recovery_states) { + if (existing_action.type == ParseActionTypeAccept) { entry.actions.push_back(action); } else { if (action.precedence > existing_action.precedence) { @@ -264,11 +256,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder { ParseItemSet &next_item_set = pair.second; ParseStateId next_state_id = add_parse_state(append_symbol(sequence, lookahead), next_item_set); - if (!processing_recovery_states) { - recovery_item_sets_by_lookahead[lookahead].add(next_item_set); - if (!parse_table.states[state_id].terminal_entries[lookahead].actions.empty()) { - lookaheads_with_conflicts.insert(lookahead); - } + if (!parse_table.states[state_id].terminal_entries[lookahead].actions.empty()) { + lookaheads_with_conflicts.insert(lookahead); } parse_table.add_terminal_action(state_id, lookahead, ParseAction::Shift(next_state_id)); @@ -280,9 +269,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder { ParseItemSet &next_item_set = pair.second; ParseStateId next_state_id = add_parse_state(append_symbol(sequence, lookahead), next_item_set); parse_table.set_nonterminal_action(state_id, lookahead.index, next_state_id); - if (!processing_recovery_states) { - recovery_item_sets_by_lookahead[lookahead].add(next_item_set); - } } for (Symbol lookahead : lookaheads_with_conflicts) { @@ -293,12 +279,21 @@ class ParseTableBuilderImpl : public ParseTableBuilder { ParseAction shift_extra = ParseAction::ShiftExtra(); ParseState &state = parse_table.states[state_id]; for (const Symbol &extra_symbol : grammar.extra_tokens) { - if (!state.terminal_entries.count(extra_symbol) || - state.has_shift_action() || processing_recovery_states) { + if (!state.terminal_entries.count(extra_symbol) || state.has_shift_action()) { parse_table.add_terminal_action(state_id, extra_symbol, shift_extra); } } + auto &terminals = state.terminal_entries; + for (auto iter = terminals.begin(), end = terminals.end(); iter != end; ++iter) { + if (iter->first.is_built_in() || iter->first.is_external()) continue; + for (auto other_iter = terminals.begin(); other_iter != iter; ++other_iter) { + if (other_iter->first.is_built_in() || other_iter->first.is_external()) continue; + coincident_tokens_by_token[iter->first.index].insert(other_iter->first); + coincident_tokens_by_token[other_iter->first.index].insert(iter->first); + } + } + return ""; } @@ -355,28 +350,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder { return false; } - void compute_unmergable_token_pairs() { - for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) { - Symbol token = Symbol::terminal(i); - auto &incompatible_indices = incompatible_tokens_by_token_index[i]; - - for (unsigned j = 0; j < n; j++) { - if (i == j) continue; - if (lex_table_builder->detect_conflict(i, j, following_tokens_by_token_index)) { - incompatible_indices.insert(Symbol::terminal(j)); - } - } - - for (const ExternalToken &external_token : grammar.external_tokens) { - if (external_token.corresponding_internal_token == token) { - for (unsigned j = 0; j < grammar.external_tokens.size(); j++) { - incompatible_indices.insert(Symbol::external(j)); - } - } - } - } - } - void remove_duplicate_parse_states() { unordered_map> state_indices_by_signature; @@ -474,7 +447,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { if (left_entry.second.actions.back().type != ParseActionTypeReduce) return false; if (!has_actions(right_state, left_entry.second)) return false; if (!lookahead.is_built_in()) { - for (const Symbol &incompatible_token : incompatible_tokens_by_token_index[lookahead.index]) { + for (const Symbol &incompatible_token : lex_table_builder->get_incompatible_tokens(lookahead.index)) { if (right_state.terminal_entries.count(incompatible_token)) return false; } } @@ -492,7 +465,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { if (right_entry.second.actions.back().type != ParseActionTypeReduce) return false; if (!has_actions(left_state, right_entry.second)) return false; if (!lookahead.is_built_in()) { - for (const Symbol &incompatible_token : incompatible_tokens_by_token_index[lookahead.index]) { + for (const Symbol &incompatible_token : lex_table_builder->get_incompatible_tokens(lookahead.index)) { if (left_state.terminal_entries.count(incompatible_token)) return false; } } @@ -788,7 +761,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder { if (left_symbol.is_terminal() && !left_symbol.is_built_in()) { right_tokens.for_each([&](Symbol right_symbol) { if (right_symbol.is_terminal() && !right_symbol.is_built_in()) { - following_tokens_by_token_index[left_symbol.index].insert(right_symbol.index); + following_tokens_by_token[left_symbol.index].insert(right_symbol); } }); } @@ -805,15 +778,12 @@ class ParseTableBuilderImpl : public ParseTableBuilder { unique_ptr ParseTableBuilder::create( const SyntaxGrammar &syntax_grammar, - const LexicalGrammar &lexical_grammar, - LexTableBuilder *lex_table_builder + const LexicalGrammar &lexical_grammar ) { - return unique_ptr( - new ParseTableBuilderImpl(syntax_grammar, lexical_grammar, lex_table_builder) - ); + return unique_ptr(new ParseTableBuilderImpl(syntax_grammar, lexical_grammar)); } -pair ParseTableBuilder::build() { +tuple ParseTableBuilder::build() { return static_cast(this)->build(); } diff --git a/src/compiler/build_tables/parse_table_builder.h b/src/compiler/build_tables/parse_table_builder.h index bab96243..1cbecb49 100644 --- a/src/compiler/build_tables/parse_table_builder.h +++ b/src/compiler/build_tables/parse_table_builder.h @@ -8,21 +8,17 @@ namespace tree_sitter { struct ParseTable; +struct LexTable; struct SyntaxGrammar; struct LexicalGrammar; namespace build_tables { -class LexTableBuilder; - class ParseTableBuilder { public: - static std::unique_ptr create( - const SyntaxGrammar &, - const LexicalGrammar &, - LexTableBuilder * - ); - std::pair build(); + static std::unique_ptr create(const SyntaxGrammar &, const LexicalGrammar &); + std::tuple build(); + protected: ParseTableBuilder() = default; }; diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 3edcf141..ad3a64cb 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -1,6 +1,6 @@ #include "tree_sitter/compiler.h" #include "compiler/prepare_grammar/prepare_grammar.h" -#include "compiler/build_tables/build_tables.h" +#include "compiler/build_tables/parse_table_builder.h" #include "compiler/generate_code/c_code.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" @@ -30,8 +30,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) { return { nullptr, strdup(error.message.c_str()), error.type }; } - auto table_build_result = - build_tables::build_tables(syntax_grammar, lexical_grammar); + auto builder = build_tables::ParseTableBuilder::create(syntax_grammar, lexical_grammar); + auto table_build_result = builder->build(); const ParseTable &parse_table = get<0>(table_build_result); const LexTable &lex_table = get<1>(table_build_result); error = get<2>(table_build_result); @@ -45,25 +45,4 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) { return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone }; } -pair compile(const InputGrammar &grammar, - std::string name) { - auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar); - const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result); - const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result); - CompileError error = get<2>(prepare_grammar_result); - if (error.type) return { "", error }; - - auto table_build_result = - build_tables::build_tables(syntax_grammar, lexical_grammar); - const ParseTable &parse_table = get<0>(table_build_result); - const LexTable &lex_table = get<1>(table_build_result); - error = get<2>(table_build_result); - if (error.type) return { "", error }; - - string code = generate_code::c_code(name, parse_table, lex_table, - syntax_grammar, lexical_grammar); - - return { code, CompileError::none() }; -} - } // namespace tree_sitter diff --git a/src/compiler/compile.h b/src/compiler/compile.h deleted file mode 100644 index 5f182bc0..00000000 --- a/src/compiler/compile.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef COMPILER_COMPILE_H_ -#define COMPILER_COMPILE_H_ - -#include -#include -#include "compiler/compile_error.h" - -namespace tree_sitter { - -struct InputGrammar; - -std::pair compile(const InputGrammar &, std::string); - -} // namespace tree_sitter - -#endif // COMPILER_COMPILE_H_ diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index cbc656a8..d11b3084 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -656,7 +656,7 @@ class CCodeGenerator { add(")"); break; case ParseActionTypeRecover: - add("RECOVER(" + to_string(action.state_index) + ")"); + add("RECOVER()"); break; default: {} } diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 9317c818..6de0792d 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -16,6 +16,7 @@ struct AdvanceAction { AdvanceAction(); AdvanceAction(size_t, PrecedenceRange, bool); bool operator==(const AdvanceAction &other) const; + inline bool operator!=(const AdvanceAction &other) const { return !operator==(other); } LexStateId state_index; PrecedenceRange precedence_range; @@ -26,7 +27,8 @@ struct AcceptTokenAction { AcceptTokenAction(); AcceptTokenAction(rules::Symbol, int, bool); bool is_present() const; - bool operator==(const AcceptTokenAction &action) const; + bool operator==(const AcceptTokenAction &other) const; + inline bool operator!=(const AcceptTokenAction &other) const { return !operator==(other); } rules::Symbol symbol; int precedence; diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index e79e02dd..4d10907c 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -40,10 +40,9 @@ ParseAction ParseAction::Shift(ParseStateId state_index) { return result; } -ParseAction ParseAction::Recover(ParseStateId state_index) { +ParseAction ParseAction::Recover() { ParseAction result; result.type = ParseActionTypeRecover; - result.state_index = state_index; return result; } @@ -133,7 +132,7 @@ bool ParseState::has_shift_action() const { void ParseState::each_referenced_state(function fn) { for (auto &entry : terminal_entries) for (ParseAction &action : entry.second.actions) - if ((action.type == ParseActionTypeShift && !action.extra) || action.type == ParseActionTypeRecover) + if (action.type == ParseActionTypeShift && !action.extra) fn(&action.state_index); for (auto &entry : nonterminal_entries) fn(&entry.second); diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 40a44dfe..39e0080b 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -27,7 +27,7 @@ struct ParseAction { static ParseAction Accept(); static ParseAction Error(); static ParseAction Shift(ParseStateId state_index); - static ParseAction Recover(ParseStateId state_index); + static ParseAction Recover(); static ParseAction Reduce(rules::Symbol symbol, size_t child_count, int precedence, int dynamic_precedence, rules::Associativity, unsigned alias_sequence_id); diff --git a/src/runtime/error_costs.c b/src/runtime/error_costs.c deleted file mode 100644 index ac055f45..00000000 --- a/src/runtime/error_costs.c +++ /dev/null @@ -1,46 +0,0 @@ -#include "runtime/error_costs.h" - -static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; -static const unsigned MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE = 24; - -ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_mergeable) { - if (a.count < b.count) { - if (are_mergeable || - a.cost <= b.cost || - a.count + 1 < b.count || - b.push_count > MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE) { - return ErrorComparisonTakeLeft; - } else { - return ErrorComparisonPreferLeft; - } - } - - if (b.count < a.count) { - if (are_mergeable || - b.cost <= a.cost || - b.count + 1 < a.count || - a.push_count > MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE) { - return ErrorComparisonTakeRight; - } else { - return ErrorComparisonPreferRight; - } - } - - if (a.cost < b.cost) { - if (are_mergeable || (b.cost - a.cost) * (1 + a.push_count) > MAX_COST_DIFFERENCE) { - return ErrorComparisonTakeLeft; - } else { - return ErrorComparisonPreferLeft; - } - } - - if (b.cost < a.cost) { - if (are_mergeable || (a.cost - b.cost) * (1 + b.push_count) > MAX_COST_DIFFERENCE) { - return ErrorComparisonTakeRight; - } else { - return ErrorComparisonPreferRight; - } - } - - return ErrorComparisonNone; -} diff --git a/src/runtime/error_costs.h b/src/runtime/error_costs.h index f65b9c93..60119aa1 100644 --- a/src/runtime/error_costs.h +++ b/src/runtime/error_costs.h @@ -1,36 +1,9 @@ #ifndef RUNTIME_ERROR_COSTS_H_ #define RUNTIME_ERROR_COSTS_H_ -#include - -#ifdef __cplusplus -extern "C" { -#endif - #define ERROR_STATE 0 #define ERROR_COST_PER_SKIPPED_TREE 100 #define ERROR_COST_PER_SKIPPED_LINE 30 #define ERROR_COST_PER_SKIPPED_CHAR 1 -typedef struct { - unsigned count; - unsigned cost; - unsigned push_count; - unsigned depth; -} ErrorStatus; - -typedef enum { - ErrorComparisonTakeLeft, - ErrorComparisonPreferLeft, - ErrorComparisonNone, - ErrorComparisonPreferRight, - ErrorComparisonTakeRight, -} ErrorComparison; - -ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool can_merge); - -#ifdef __cplusplus -} -#endif - #endif diff --git a/src/runtime/language.c b/src/runtime/language.c index 8f6c37ac..cb4e7383 100644 --- a/src/runtime/language.c +++ b/src/runtime/language.c @@ -2,33 +2,22 @@ #include "runtime/tree.h" #include "runtime/error_costs.h" -static const TSParseAction SHIFT_ERROR = { - .type = TSParseActionTypeShift, - .params.state = ERROR_STATE, -}; - void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result) { - uint32_t action_index; if (symbol == ts_builtin_sym_error) { - if (state == ERROR_STATE) { - result->action_count = 1; - result->is_reusable = false; - result->depends_on_lookahead = false; - result->actions = &SHIFT_ERROR; - return; - } - action_index = 0; + result->action_count = 0; + result->is_reusable = false; + result->actions = NULL; + return; } else { assert(symbol < self->token_count); - action_index = self->parse_table[state * self->symbol_count + symbol]; + uint32_t action_index = self->parse_table[state * self->symbol_count + symbol]; + const TSParseActionEntry *entry = &self->parse_actions[action_index]; + result->action_count = entry->count; + result->is_reusable = entry->reusable; + result->depends_on_lookahead = entry->depends_on_lookahead; + result->actions = (const TSParseAction *)(entry + 1); } - - const TSParseActionEntry *entry = &self->parse_actions[action_index]; - result->action_count = entry->count; - result->is_reusable = entry->reusable; - result->depends_on_lookahead = entry->depends_on_lookahead; - result->actions = (const TSParseAction *)(entry + 1); } uint32_t ts_language_symbol_count(const TSLanguage *language) { diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 3f1765f1..640baf2a 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -34,24 +34,23 @@ #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) -static const uint32_t MAX_VERSION_COUNT = 10; -static const uint32_t MAX_PRECEDING_TREES_TO_SKIP = 32; +static const unsigned MAX_VERSION_COUNT = 6; +static const unsigned MAX_SUMMARY_DEPTH = 16; +static const int MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; typedef struct { - Parser *parser; - TSSymbol lookahead_symbol; - TreeArray *trees_above_error; - uint32_t tree_count_above_error; - bool found_repair; - ReduceAction best_repair; - TSStateId best_repair_next_state; - uint32_t best_repair_skip_count; -} ErrorRepairSession; + unsigned cost; + unsigned push_count; + bool is_in_error; +} ErrorStatus; -typedef struct { - Parser *parser; - TSSymbol lookahead_symbol; -} SkipPrecedingTreesSession; +typedef enum { + ErrorComparisonTakeLeft, + ErrorComparisonPreferLeft, + ErrorComparisonNone, + ErrorComparisonPreferRight, + ErrorComparisonTakeRight, +} ErrorComparison; static void parser__log(Parser *self) { if (self->lexer.logger.log) { @@ -110,8 +109,8 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) { LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol)); LOG_STACK(); - ts_stack_decrease_push_count(self->stack, slice.version, - parent->child_count + 1); + ts_stack_decrease_push_count(self->stack, slice.version, parent->child_count + 1); + ts_tree_release(parent); array_delete(&slice.trees); } @@ -136,10 +135,72 @@ static void parser__breakdown_lookahead(Parser *self, Tree **lookahead, } } -static bool parser__condense_stack(Parser *self) { - bool all_versions_have_error = true; - unsigned old_version_count = ts_stack_version_count(self->stack); +static ErrorComparison parser__compare_versions(Parser *self, ErrorStatus a, ErrorStatus b) { + if (!a.is_in_error && b.is_in_error) { + if (a.cost < b.cost) { + return ErrorComparisonTakeLeft; + } else { + return ErrorComparisonPreferLeft; + } + } + if (a.is_in_error && !b.is_in_error) { + if (b.cost < a.cost) { + return ErrorComparisonTakeRight; + } else { + return ErrorComparisonPreferRight; + } + } + + if (a.cost < b.cost) { + if ((b.cost - a.cost) * (1 + a.push_count) > MAX_COST_DIFFERENCE) { + return ErrorComparisonTakeLeft; + } else { + return ErrorComparisonPreferLeft; + } + } + + if (b.cost < a.cost) { + if ((a.cost - b.cost) * (1 + b.push_count) > MAX_COST_DIFFERENCE) { + return ErrorComparisonTakeRight; + } else { + return ErrorComparisonPreferRight; + } + } + + return ErrorComparisonNone; +} + +static bool parser__better_version_exists(Parser *self, StackVersion version, + bool is_in_error, unsigned cost) { + if (self->finished_tree && self->finished_tree->error_cost <= cost) return true; + + ErrorStatus status = {.cost = cost, .is_in_error = is_in_error, .push_count = 0}; + + for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { + if (i == version || ts_stack_is_halted(self->stack, i)) continue; + ErrorStatus status_i = { + .cost = ts_stack_error_cost(self->stack, i), + .is_in_error = ts_stack_top_state(self->stack, i) == ERROR_STATE, + .push_count = ts_stack_push_count(self->stack, i) + }; + switch (parser__compare_versions(self, status, status_i)) { + case ErrorComparisonTakeRight: + return true; + case ErrorComparisonPreferRight: + if (ts_stack_can_merge(self->stack, i, version)) return true; + default: + break; + } + } + + return false; +} + +static bool parser__condense_stack(Parser *self) { + bool made_changes = false; + unsigned min_error_cost = UINT_MAX; + bool all_versions_have_error = true; for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { if (ts_stack_is_halted(self->stack, i)) { ts_stack_remove_version(self->stack, i); @@ -147,35 +208,47 @@ static bool parser__condense_stack(Parser *self) { continue; } - ErrorStatus right_error_status = ts_stack_error_status(self->stack, i); - if (right_error_status.count == 0) all_versions_have_error = false; + ErrorStatus status_i = { + .cost = ts_stack_error_cost(self->stack, i), + .push_count = ts_stack_push_count(self->stack, i), + .is_in_error = ts_stack_top_state(self->stack, i) == ERROR_STATE, + }; + if (!status_i.is_in_error) all_versions_have_error = false; + if (status_i.cost < min_error_cost) min_error_cost = status_i.cost; for (StackVersion j = 0; j < i; j++) { - bool can_merge = ts_stack_can_merge(self->stack, i, j); - ErrorStatus left_error_status = ts_stack_error_status(self->stack, j); + ErrorStatus status_j = { + .cost = ts_stack_error_cost(self->stack, j), + .push_count = ts_stack_push_count(self->stack, j), + .is_in_error = ts_stack_top_state(self->stack, j) == ERROR_STATE, + }; - switch (error_status_compare(left_error_status, right_error_status, can_merge)) { + bool can_merge = ts_stack_can_merge(self->stack, j, i); + switch (parser__compare_versions(self, status_j, status_i)) { case ErrorComparisonTakeLeft: + made_changes = true; ts_stack_remove_version(self->stack, i); i--; j = i; break; - - case ErrorComparisonTakeRight: - ts_stack_remove_version(self->stack, j); - i--; - j--; - break; - case ErrorComparisonPreferLeft: if (can_merge) { + made_changes = true; + ts_stack_remove_version(self->stack, i); + i--; + j = i; + } + break; + case ErrorComparisonNone: + if (can_merge) { + made_changes = true; ts_stack_force_merge(self->stack, j, i); i--; j = i; } break; - case ErrorComparisonPreferRight: + made_changes = true; if (can_merge) { ts_stack_remove_version(self->stack, j); i--; @@ -185,12 +258,11 @@ static bool parser__condense_stack(Parser *self) { j = i; } break; - - case ErrorComparisonNone: - if (can_merge) { - ts_stack_force_merge(self->stack, j, i); - i--; - } + case ErrorComparisonTakeRight: + made_changes = true; + ts_stack_remove_version(self->stack, j); + i--; + j--; break; } } @@ -198,19 +270,20 @@ static bool parser__condense_stack(Parser *self) { while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); + made_changes = true; } - unsigned new_version_count = ts_stack_version_count(self->stack); - if (new_version_count != old_version_count) { + if (made_changes) { LOG("condense"); LOG_STACK(); } - return all_versions_have_error && new_version_count > 0; + return + (all_versions_have_error && ts_stack_version_count(self->stack) > 0) || + (self->finished_tree && self->finished_tree->error_cost < min_error_cost); } static void parser__restore_external_scanner(Parser *self, Tree *external_token) { - LOG("restore_external_scanner"); if (external_token) { self->language->external_scanner.deserialize( self->external_scanner_payload, @@ -222,8 +295,7 @@ static void parser__restore_external_scanner(Parser *self, Tree *external_token) } } -static Tree *parser__lex(Parser *self, StackVersion version) { - TSStateId parse_state = ts_stack_top_state(self->stack, version); +static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_state) { Length start_position = ts_stack_top_position(self->stack, version); Tree *external_token = ts_stack_last_external_token(self->stack, version); TSLexMode lex_mode = self->language->lex_modes[parse_state]; @@ -234,6 +306,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) { bool found_external_token = false; bool skipped_error = false; + bool error_mode = parse_state == ERROR_STATE; int32_t first_error_character = 0; Length error_start_position, error_end_position; uint32_t last_byte_scanned = start_position.bytes; @@ -260,8 +333,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) { self->lexer.token_end_position = self->lexer.current_position; } - if (lex_mode.lex_state == ERROR_STATE && - self->lexer.token_end_position.bytes <= current_position.bytes) { + if (error_mode && self->lexer.token_end_position.bytes <= current_position.bytes) { LOG("disregard_empty_token"); } else { found_external_token = true; @@ -289,8 +361,9 @@ static Tree *parser__lex(Parser *self, StackVersion version) { break; } - if (lex_mode.lex_state != self->language->lex_modes[ERROR_STATE].lex_state) { + if (!error_mode) { LOG("retry_in_error_mode"); + error_mode = true; lex_mode = self->language->lex_modes[ERROR_STATE]; valid_external_tokens = ts_language_enabled_external_tokens( self->language, @@ -462,7 +535,7 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId } } - result = parser__lex(self, version); + result = parser__lex(self, version, *state); parser__set_cached_token(self, position.bytes, last_external_token, result); ts_language_table_entry(self->language, *state, result->symbol, table_entry); return result; @@ -518,30 +591,6 @@ static bool parser__select_tree(Parser *self, Tree *left, Tree *right) { } } -static bool parser__better_version_exists(Parser *self, StackVersion version, - ErrorStatus my_error_status) { - if (self->finished_tree && self->finished_tree->error_cost <= my_error_status.cost) return true; - - for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { - if (i == version || ts_stack_is_halted(self->stack, i)) continue; - - switch (error_status_compare(my_error_status, - ts_stack_error_status(self->stack, i), - ts_stack_can_merge(self->stack, i, version))) { - case ErrorComparisonTakeLeft: - LOG("halt_other version:%u", i); - ts_stack_halt(self->stack, i); - break; - case ErrorComparisonTakeRight: - if (i < version) return true; - default: - break; - } - } - - return false; -} - static void parser__shift(Parser *self, StackVersion version, TSStateId state, Tree *lookahead, bool extra) { if (extra != lookahead->extra) { @@ -579,14 +628,12 @@ static bool parser__replace_children(Parser *self, Tree *tree, Tree **children, } } -static StackPopResult parser__reduce(Parser *self, StackVersion version, - TSSymbol symbol, uint32_t count, - int dynamic_precedence, uint16_t alias_sequence_id, - bool fragile, bool allow_skipping) { +static StackPopResult parser__reduce(Parser *self, StackVersion version, TSSymbol symbol, + uint32_t count, int dynamic_precedence, + uint16_t alias_sequence_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); StackPopResult pop = ts_stack_pop_count(self->stack, version, count); - if (pop.stopped_at_error) return pop; for (uint32_t i = 0; i < pop.slices.size; i++) { StackSlice slice = pop.slices.contents[i]; @@ -638,24 +685,6 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version, parent->parse_state = state; } - // If this pop operation terminated at the end of an error region, then - // create two stack versions: one in which the parent node is interpreted - // normally, and one in which the parent node is skipped. - if (state == ERROR_STATE && allow_skipping && child_count > 1) { - StackVersion other_version = ts_stack_copy_version(self->stack, slice.version); - - ts_stack_push(self->stack, other_version, parent, false, ERROR_STATE); - for (uint32_t j = parent->child_count; j < slice.trees.size; j++) { - Tree *tree = slice.trees.contents[j]; - ts_stack_push(self->stack, other_version, tree, false, ERROR_STATE); - } - - ErrorStatus error_status = ts_stack_error_status(self->stack, other_version); - if (parser__better_version_exists(self, version, error_status)) { - ts_stack_remove_version(self->stack, other_version); - } - } - // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. ts_stack_push(self->stack, slice.version, parent, false, next_state); @@ -679,211 +708,6 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version, return pop; } -static const TSParseAction *parser__reductions_after_sequence(Parser *self, - TSStateId start_state, - const TreeArray *trees_below, - uint32_t tree_count_below, - const TreeArray *trees_above, - TSSymbol lookahead_symbol, - uint32_t *count) { - TSStateId state = start_state; - uint32_t child_count = 0; - *count = 0; - - for (uint32_t i = 0; i < trees_below->size; i++) { - if (child_count == tree_count_below) - break; - Tree *tree = trees_below->contents[trees_below->size - 1 - i]; - if (tree->extra) continue; - TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol); - if (next_state == ERROR_STATE) - return NULL; - if (next_state != state) { - child_count++; - state = next_state; - } - } - - for (uint32_t i = 0; i < trees_above->size; i++) { - Tree *tree = trees_above->contents[i]; - if (tree->extra) continue; - TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol); - if (next_state == ERROR_STATE) - return NULL; - if (next_state != state) { - child_count++; - state = next_state; - } - } - - const TSParseAction *actions = - ts_language_actions(self->language, state, lookahead_symbol, count); - - if (*count > 0 && actions[*count - 1].type != TSParseActionTypeReduce) { - (*count)--; - } - - while (*count > 0 && actions[0].params.child_count < child_count) { - actions++; - (*count)--; - } - - while (*count > 0 && actions[*count - 1].params.child_count > child_count) { - (*count)--; - } - - return actions; -} - -static StackIterateAction parser__repair_error_callback(void *payload, TSStateId state, - const TreeArray *trees, - uint32_t tree_count) { - ErrorRepairSession *session = payload; - Parser *self = session->parser; - TSSymbol lookahead_symbol = session->lookahead_symbol; - ReduceActionSet *repairs = &self->reduce_actions; - TreeArray *trees_above_error = session->trees_above_error; - uint32_t tree_count_above_error = session->tree_count_above_error; - - StackIterateAction result = StackIterateNone; - - uint32_t last_repair_count = -1; - uint32_t repair_reduction_count = 0; - const TSParseAction *repair_reductions = NULL; - - for (uint32_t i = 0; i < repairs->size; i++) { - ReduceAction *repair = &repairs->contents[i]; - uint32_t count_needed_below_error = repair->count - tree_count_above_error; - if (count_needed_below_error > tree_count) - break; - - uint32_t skip_count = tree_count - count_needed_below_error; - if (session->found_repair && skip_count >= session->best_repair_skip_count) { - array_erase(repairs, i--); - continue; - } - - TSStateId state_after_repair = ts_language_next_state(self->language, state, repair->symbol); - if (state == ERROR_STATE || state_after_repair == ERROR_STATE) - continue; - - uint32_t action_count; - ts_language_actions(self->language, state_after_repair, lookahead_symbol, &action_count); - if (action_count == 0) - continue; - - if (count_needed_below_error != last_repair_count) { - last_repair_count = count_needed_below_error; - repair_reductions = parser__reductions_after_sequence( - self, state, trees, count_needed_below_error, trees_above_error, - lookahead_symbol, &repair_reduction_count); - } - - for (uint32_t j = 0; j < repair_reduction_count; j++) { - if (repair_reductions[j].params.symbol == repair->symbol) { - result |= StackIteratePop; - session->found_repair = true; - session->best_repair = *repair; - session->best_repair_skip_count = skip_count; - session->best_repair_next_state = state_after_repair; - array_erase(repairs, i--); - break; - } - } - } - - if (repairs->size == 0) - result |= StackIterateStop; - - return result; -} - -static bool parser__repair_error(Parser *self, StackSlice slice, - TSSymbol lookahead_symbol, TableEntry entry) { - LOG("repair_error"); - ErrorRepairSession session = { - .parser = self, - .lookahead_symbol = lookahead_symbol, - .found_repair = false, - .trees_above_error = &slice.trees, - .tree_count_above_error = ts_tree_array_essential_count(&slice.trees), - }; - - array_clear(&self->reduce_actions); - for (uint32_t i = 0; i < entry.action_count; i++) { - TSParseAction action = entry.actions[i]; - if (action.type == TSParseActionTypeReduce) { - TSSymbol symbol = action.params.symbol; - uint32_t child_count = action.params.child_count; - if ((child_count > session.tree_count_above_error) || - (child_count == session.tree_count_above_error && - !ts_language_symbol_metadata(self->language, symbol).visible)) - array_push(&self->reduce_actions, ((ReduceAction){ - .symbol = symbol, - .count = child_count, - .alias_sequence_id = action.params.alias_sequence_id, - })); - } - } - - StackPopResult pop = ts_stack_iterate( - self->stack, slice.version, parser__repair_error_callback, &session); - - if (!session.found_repair) { - LOG("no_repair_found"); - ts_stack_remove_version(self->stack, slice.version); - ts_tree_array_delete(&slice.trees); - return false; - } - - ReduceAction repair = session.best_repair; - TSStateId next_state = session.best_repair_next_state; - uint32_t skip_count = session.best_repair_skip_count; - - StackSlice new_slice = array_pop(&pop.slices); - TreeArray children = new_slice.trees; - ts_stack_renumber_version(self->stack, new_slice.version, slice.version); - - for (uint32_t i = pop.slices.size - 1; i + 1 > 0; i--) { - StackSlice other_slice = pop.slices.contents[i]; - ts_tree_array_delete(&other_slice.trees); - if (other_slice.version != pop.slices.contents[i + 1].version) - ts_stack_remove_version(self->stack, other_slice.version); - } - - TreeArray skipped_children = ts_tree_array_remove_last_n(&children, skip_count); - TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&skipped_children); - Tree *error = ts_tree_make_error_node(&skipped_children, self->language); - error->extra = true; - array_push(&children, error); - array_push_all(&children, &trailing_extras); - trailing_extras.size = 0; - array_delete(&trailing_extras); - - for (uint32_t i = 0; i < slice.trees.size; i++) - array_push(&children, slice.trees.contents[i]); - array_delete(&slice.trees); - - Tree *parent = ts_tree_make_node( - repair.symbol, children.size, children.contents, - repair.alias_sequence_id, self->language - ); - ts_stack_push(self->stack, slice.version, parent, false, next_state); - ts_tree_release(parent); - ts_stack_decrease_push_count(self->stack, slice.version, error->child_count); - - ErrorStatus error_status = ts_stack_error_status(self->stack, slice.version); - if (parser__better_version_exists(self, slice.version, error_status)) { - LOG("no_better_repair_found"); - ts_stack_halt(self->stack, slice.version); - return false; - } else { - LOG("repair_found sym:%s, child_count:%u, cost:%u", SYM_NAME(repair.symbol), - repair.count, parent->error_cost); - return true; - } -} - static void parser__start(Parser *self, TSInput input, Tree *previous_tree) { if (previous_tree) { LOG("parse_after_edit"); @@ -985,18 +809,12 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version) bool did_reduce = false; for (uint32_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; - StackPopResult reduction = parser__reduce( + parser__reduce( self, version, action.symbol, action.count, action.dynamic_precedence, action.alias_sequence_id, - true, false + true ); - if (reduction.stopped_at_error) { - ts_tree_array_delete(&reduction.slices.contents[0].trees); - ts_stack_remove_version(self->stack, reduction.slices.contents[0].version); - continue; - } else { - did_reduce = true; - } + did_reduce = true; } if (did_reduce) { @@ -1011,60 +829,11 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version) } } -static StackIterateAction parser__skip_preceding_trees_callback( - void *payload, TSStateId state, const TreeArray *trees, uint32_t tree_count) { - if (trees->size > MAX_PRECEDING_TREES_TO_SKIP) return StackIterateStop; - if (tree_count > 0 && state != ERROR_STATE) { - uint32_t bytes_skipped = 0; - for (uint32_t i = 0; i < trees->size; i++) { - bytes_skipped += ts_tree_total_bytes(trees->contents[i]); - } - if (bytes_skipped == 0) return StackIterateNone; - SkipPrecedingTreesSession *session = payload; - Parser *self = session->parser; - TSSymbol lookahead_symbol = session->lookahead_symbol; - uint32_t action_count; - const TSParseAction *actions = - ts_language_actions(self->language, state, lookahead_symbol, &action_count); - if (action_count > 0 && actions[0].type == TSParseActionTypeReduce) { - return StackIteratePop | StackIterateStop; - } - } - return StackIterateNone; -} - -static bool parser__skip_preceding_trees(Parser *self, StackVersion version, - TSSymbol lookahead_symbol) { - SkipPrecedingTreesSession session = { self, lookahead_symbol }; - StackPopResult pop = ts_stack_iterate( - self->stack, version, parser__skip_preceding_trees_callback, &session); - - StackVersion previous_version = STACK_VERSION_NONE; - for (uint32_t i = 0; i < pop.slices.size; i++) { - StackSlice slice = pop.slices.contents[i]; - if (slice.version == previous_version) { - ts_tree_array_delete(&slice.trees); - continue; - } - - previous_version = slice.version; - Tree *error = ts_tree_make_error_node(&slice.trees, self->language); - error->extra = true; - TSStateId state = ts_stack_top_state(self->stack, slice.version); - ts_stack_push(self->stack, slice.version, error, false, state); - ts_tree_release(error); - } - - return pop.slices.size > 0; -} - -static void parser__handle_error(Parser *self, StackVersion version, - TSSymbol lookahead_symbol) { +static void parser__handle_error(Parser *self, StackVersion version, TSSymbol lookahead_symbol) { // If there are other stack versions that are clearly better than this one, // just halt this version. - ErrorStatus error_status = ts_stack_error_status(self->stack, version); - error_status.count++; - if (parser__better_version_exists(self, version, error_status)) { + unsigned new_cost = ts_stack_error_cost(self->stack, version) + ERROR_COST_PER_SKIPPED_TREE; + if (parser__better_version_exists(self, version, true, new_cost)) { ts_stack_halt(self->stack, version); LOG("bail_on_error"); return; @@ -1072,16 +841,6 @@ static void parser__handle_error(Parser *self, StackVersion version, LOG("handle_error"); - // If the current lookahead symbol would have been valid in some previous - // state on the stack, create one stack version that repairs the error - // immediately by simply skipping all of the trees that came after that state. - if (ts_stack_version_count(self->stack) < MAX_VERSION_COUNT) { - if (parser__skip_preceding_trees(self, version, lookahead_symbol)) { - LOG("skip_preceding_trees"); - LOG_STACK(); - } - } - // Perform any reductions that could have happened in this state, regardless // of the lookahead. uint32_t previous_version_count = ts_stack_version_count(self->stack); @@ -1102,6 +861,9 @@ static void parser__handle_error(Parser *self, StackVersion version, ts_stack_push(self->stack, previous_version_count, NULL, false, ERROR_STATE); ts_stack_force_merge(self->stack, version, previous_version_count); } + + ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); + LOG_STACK(); } static void parser__halt_parse(Parser *self) { @@ -1129,8 +891,93 @@ static void parser__halt_parse(Parser *self) { ts_tree_release(eof); } -static void parser__recover(Parser *self, StackVersion version, TSStateId state, - Tree *lookahead) { +static void parser__recover(Parser *self, StackVersion version, Tree *lookahead) { + bool did_recover = false; + unsigned previous_version_count = ts_stack_version_count(self->stack); + Length position = ts_stack_top_position(self->stack, version); + StackSummary *summary = ts_stack_get_summary(self->stack, version); + for (unsigned i = 0; i < summary->size; i++) { + StackSummaryEntry entry = summary->contents[i]; + if (entry.state == ERROR_STATE) continue; + unsigned depth = entry.depth + ts_stack_depth_since_error(self->stack, version); + + unsigned new_cost = + depth * ERROR_COST_PER_SKIPPED_TREE + + (position.chars - entry.position.chars) * ERROR_COST_PER_SKIPPED_CHAR + + (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; + if (parser__better_version_exists(self, version, false, new_cost)) break; + + unsigned count = 0; + if (ts_language_actions(self->language, entry.state, lookahead->symbol, &count) && count > 0) { + LOG("recover state:%u, depth:%u", entry.state, depth); + StackPopResult pop = ts_stack_pop_count(self->stack, version, depth); + StackVersion previous_version = STACK_VERSION_NONE; + for (unsigned j = 0; j < pop.slices.size; j++) { + StackSlice slice = pop.slices.contents[j]; + if (slice.version == previous_version) { + ts_tree_array_delete(&slice.trees); + continue; + } + + if (ts_stack_top_state(self->stack, slice.version) != entry.state) { + ts_tree_array_delete(&slice.trees); + ts_stack_halt(self->stack, slice.version); + continue; + } + + StackPopResult error_pop = ts_stack_pop_error(self->stack, slice.version); + if (error_pop.slices.size > 0) { + StackSlice error_slice = error_pop.slices.contents[0]; + array_push_all(&error_slice.trees, &slice.trees); + array_delete(&slice.trees); + slice.trees = error_slice.trees; + ts_stack_renumber_version(self->stack, error_slice.version, slice.version); + } + + TreeArray trailing_extras = ts_tree_array_remove_trailing_extras(&slice.trees); + if (slice.trees.size > 0) { + Tree *error = ts_tree_make_error_node(&slice.trees, self->language); + error->extra = true; + ts_stack_push(self->stack, slice.version, error, false, entry.state); + ts_tree_release(error); + } else { + array_delete(&slice.trees); + } + previous_version = slice.version; + + for (unsigned k = 0; k < trailing_extras.size; k++) { + Tree *tree = trailing_extras.contents[k]; + ts_stack_push(self->stack, slice.version, tree, false, entry.state); + ts_tree_release(tree); + } + + array_delete(&trailing_extras); + did_recover = true; + } + break; + } + } + + for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { + if (ts_stack_is_halted(self->stack, i)) { + ts_stack_remove_version(self->stack, i); + i--; + } else { + for (unsigned j = 0; j < i; j++) { + if (ts_stack_can_merge(self->stack, j, i)) { + ts_stack_remove_version(self->stack, i); + i--; + break; + } + } + } + } + + if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { + ts_stack_halt(self->stack, version); + return; + } + if (lookahead->symbol == ts_builtin_sym_end) { LOG("recover_eof"); TreeArray children = array_new(); @@ -1141,20 +988,13 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state, return; } - LOG("recover state:%u", state); + LOG("skip_token symbol:%s", SYM_NAME(lookahead->symbol)); + bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra; + parser__shift(self, version, ERROR_STATE, lookahead, can_be_extra); - if (ts_stack_version_count(self->stack) < MAX_VERSION_COUNT) { - StackVersion new_version = ts_stack_copy_version(self->stack, version); - bool can_be_extra = ts_language_symbol_metadata(self->language, lookahead->symbol).extra; - parser__shift(self, new_version, ERROR_STATE, lookahead, can_be_extra); - - ErrorStatus error_status = ts_stack_error_status(self->stack, new_version); - if (parser__better_version_exists(self, version, error_status)) { - ts_stack_remove_version(self->stack, new_version); - } + if (parser__better_version_exists(self, version, true, ts_stack_error_cost(self->stack, version))) { + ts_stack_halt(self->stack, version); } - - parser__shift(self, version, state, lookahead, false); } static void parser__advance(Parser *self, StackVersion version, ReusableNode *reusable_node) { @@ -1163,7 +1003,6 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re Tree *lookahead = parser__get_lookahead(self, version, &state, reusable_node, &table_entry); for (;;) { - bool reduction_stopped_at_error = false; StackVersion last_reduction_version = STACK_VERSION_NONE; for (uint32_t i = 0; i < table_entry.action_count; i++) { @@ -1192,26 +1031,18 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re } case TSParseActionTypeReduce: { - if (reduction_stopped_at_error) continue; LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackPopResult reduction = parser__reduce( self, version, action.params.symbol, action.params.child_count, action.params.dynamic_precedence, action.params.alias_sequence_id, - action.params.fragile, true + action.params.fragile ); StackSlice slice = *array_front(&reduction.slices); - if (reduction.stopped_at_error) { - reduction_stopped_at_error = true; - if (!parser__repair_error(self, slice, lookahead->first_leaf.symbol, table_entry)) { - break; - } - } last_reduction_version = slice.version; break; } case TSParseActionTypeAccept: { - if (ts_stack_error_status(self->stack, version).count > 0) continue; LOG("accept"); parser__accept(self, version, lookahead); ts_tree_release(lookahead); @@ -1220,13 +1051,9 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re case TSParseActionTypeRecover: { while (lookahead->child_count > 0) { - reusable_node_breakdown(reusable_node); - ts_tree_release(lookahead); - lookahead = reusable_node->tree; - ts_tree_retain(lookahead); + parser__breakdown_lookahead(self, &lookahead, state, reusable_node); } - - parser__recover(self, version, action.params.state, lookahead); + parser__recover(self, version, lookahead); if (lookahead == reusable_node->tree) reusable_node_pop(reusable_node); ts_tree_release(lookahead); return; @@ -1306,12 +1133,13 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err do { for (version = 0; version < ts_stack_version_count(self->stack); version++) { reusable_node = self->reusable_node; - last_position = position; while (!ts_stack_is_halted(self->stack, version)) { - position = ts_stack_top_position(self->stack, version).chars; - if (position > last_position || (version > 0 && position == last_position)) + position = ts_stack_top_position(self->stack, version).bytes; + if (position > last_position || (version > 0 && position == last_position)) { + last_position = position; break; + } LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", version, ts_stack_version_count(self->stack), @@ -1326,10 +1154,14 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err self->reusable_node = reusable_node; - bool all_versions_have_error = parser__condense_stack(self); - if (halt_on_error && all_versions_have_error) { - parser__halt_parse(self); - break; + bool should_halt = parser__condense_stack(self); + if (should_halt) { + if (self->finished_tree) { + break; + } else if (halt_on_error) { + parser__halt_parse(self); + break; + } } self->in_ambiguity = version > 1; diff --git a/src/runtime/stack.c b/src/runtime/stack.c index ce5f9a6d..986d8cb5 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -21,8 +21,6 @@ typedef struct StackNode StackNode; typedef struct { StackNode *node; Tree *tree; - uint32_t push_count; - uint32_t depth; bool is_pending; } StackLink; @@ -33,24 +31,16 @@ struct StackNode { short unsigned int link_count; uint32_t ref_count; unsigned error_cost; - unsigned error_count; + unsigned depth; }; typedef struct { StackNode *node; TreeArray trees; uint32_t tree_count; - uint32_t push_count; - uint32_t depth; bool is_pending; } Iterator; -typedef struct { - uint32_t goal_tree_count; - bool found_error; - bool found_valid_path; -} StackPopSession; - typedef struct { void *payload; StackIterateCallback callback; @@ -62,8 +52,8 @@ typedef struct { StackNode *node; Tree *last_external_token; uint32_t push_count; - uint32_t depth; bool is_halted; + StackSummary *summary; } StackHead; struct Stack { @@ -117,7 +107,7 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p StackNode *node = pool->size > 0 ? array_pop(pool) : ts_malloc(sizeof(StackNode)); - *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; + *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state, .depth = 0}; if (previous_node) { stack_node_retain(previous_node); @@ -127,30 +117,31 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p .node = previous_node, .tree = tree, .is_pending = is_pending, - .push_count = 0, - .depth = 0, }; node->position = previous_node->position; - node->error_count = previous_node->error_count; node->error_cost = previous_node->error_cost; if (tree) { + node->depth = previous_node->depth; + if (!tree->extra) node->depth++; ts_tree_retain(tree); node->error_cost += tree->error_cost; node->position = length_add(node->position, ts_tree_total_size(tree)); if (state == ERROR_STATE && !tree->extra) { node->error_cost += - ERROR_COST_PER_SKIPPED_TREE * (tree->visible ? 1 : tree->visible_child_count) + - ERROR_COST_PER_SKIPPED_CHAR * (tree->padding.chars + tree->size.chars) + - ERROR_COST_PER_SKIPPED_LINE * (tree->padding.extent.row + tree->size.extent.row); + ERROR_COST_PER_SKIPPED_TREE * ((tree->visible || tree->child_count == 0) ? 1 : tree->visible_child_count) + + ERROR_COST_PER_SKIPPED_CHAR * tree->size.chars + + ERROR_COST_PER_SKIPPED_LINE * tree->size.extent.row; + if (previous_node->links[0].tree) { + node->error_cost += + ERROR_COST_PER_SKIPPED_CHAR * tree->padding.chars + + ERROR_COST_PER_SKIPPED_LINE * tree->padding.extent.row; + } } - } else { - node->error_count++; } } else { node->position = length_zero(); - node->error_count = 0; node->error_cost = 0; } @@ -158,15 +149,17 @@ static StackNode *stack_node_new(StackNode *previous_node, Tree *tree, bool is_p } static bool stack__tree_is_equivalent(const Tree *left, const Tree *right) { - return left == right || ( - left && - right && - left->child_count == 0 && right->child_count == 0 && - left->symbol == right->symbol && - left->padding.bytes == right->padding.bytes && - left->size.bytes == right->size.bytes && - left->extra == right->extra && - ts_tree_external_token_state_eq(left, right)); + return + left == right || + (left && + right && + left->symbol == right->symbol && + ((left->error_cost > 0 && right->error_cost > 0) || + (left->child_count == 0 && right->child_count == 0 && + left->padding.bytes == right->padding.bytes && + left->size.bytes == right->size.bytes && + left->extra == right->extra && + ts_tree_external_token_state_eq(left, right)))); } static void stack_node_add_link(StackNode *self, StackLink link) { @@ -174,7 +167,8 @@ static void stack_node_add_link(StackNode *self, StackLink link) { StackLink existing_link = self->links[i]; if (stack__tree_is_equivalent(existing_link.tree, link.tree)) { if (existing_link.node == link.node) return; - if (existing_link.node->state == link.node->state) { + if (existing_link.node->state == link.node->state && + existing_link.node->position.bytes == link.node->position.bytes) { for (int j = 0; j < link.node->link_count; j++) { stack_node_add_link(existing_link.node, link.node->links[j]); } @@ -195,17 +189,19 @@ static void stack_head_delete(StackHead *self, StackNodeArray *pool) { if (self->last_external_token) { ts_tree_release(self->last_external_token); } + if (self->summary) { + array_delete(self->summary); + ts_free(self->summary); + } stack_node_release(self->node, pool); } } -static StackVersion ts_stack__add_version(Stack *self, StackNode *node, - uint32_t push_count, uint32_t depth, - Tree *last_external_token) { +static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, + StackNode *node, Tree *last_external_token) { StackHead head = { .node = node, - .depth = depth, - .push_count = push_count, + .push_count = self->heads.contents[original_version].push_count, .last_external_token = last_external_token, .is_halted = false, }; @@ -215,38 +211,35 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node, return (StackVersion)(self->heads.size - 1); } -static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees, - uint32_t push_count, uint32_t depth, - Tree *last_external_token) { +static void ts_stack__add_slice(Stack *self, StackVersion original_version, StackNode *node, + TreeArray *trees, Tree *last_external_token) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { - StackSlice slice = { *trees, version }; + StackSlice slice = {*trees, version}; array_insert(&self->slices, i + 1, slice); return; } } - StackVersion version = ts_stack__add_version(self, node, push_count, depth, last_external_token); + StackVersion version = ts_stack__add_version(self, original_version, node, last_external_token); StackSlice slice = { *trees, version }; array_push(&self->slices, slice); } inline StackPopResult stack__iter(Stack *self, StackVersion version, - StackIterateInternalCallback callback, void *payload) { + StackIterateInternalCallback callback, void *payload, + bool include_trees) { array_clear(&self->slices); array_clear(&self->iterators); StackHead *head = array_get(&self->heads, version); - uint32_t starting_push_count = head->push_count; Tree *last_external_token = head->last_external_token; Iterator iterator = { .node = head->node, .trees = array_new(), .tree_count = 0, .is_pending = true, - .push_count = 0, - .depth = head->depth, }; array_push(&self->iterators, iterator); @@ -266,10 +259,9 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version, ts_tree_array_reverse(&trees); ts_stack__add_slice( self, + version, node, &trees, - starting_push_count + iterator->push_count, - iterator->depth, last_external_token ); } @@ -298,28 +290,27 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version, } next_iterator->node = link.node; - next_iterator->push_count += link.push_count; - if (link.depth > 0) { - next_iterator->depth = link.depth; - } if (link.tree) { + if (include_trees) { + array_push(&next_iterator->trees, link.tree); + ts_tree_retain(link.tree); + } + if (!link.tree->extra) { next_iterator->tree_count++; - next_iterator->depth--; if (!link.is_pending) { next_iterator->is_pending = false; } } - array_push(&next_iterator->trees, link.tree); - ts_tree_retain(link.tree); } else { + next_iterator->tree_count++; next_iterator->is_pending = false; } } } } - return (StackPopResult){ false, self->slices }; + return (StackPopResult){self->slices}; } Stack *ts_stack_new() { @@ -375,8 +366,7 @@ unsigned ts_stack_push_count(const Stack *self, StackVersion version) { return array_get(&self->heads, version)->push_count; } -void ts_stack_decrease_push_count(Stack *self, StackVersion version, - unsigned decrement) { +void ts_stack_decrease_push_count(Stack *self, StackVersion version, unsigned decrement) { array_get(&self->heads, version)->push_count -= decrement; } @@ -391,33 +381,18 @@ void ts_stack_set_last_external_token(Stack *self, StackVersion version, Tree *t head->last_external_token = token; } -ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) { +unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { StackHead *head = array_get(&self->heads, version); - return (ErrorStatus){ - .cost = head->node->error_cost, - .count = head->node->error_count, - .push_count = head->push_count, - .depth = head->depth, - }; + return head->node->error_cost; } -unsigned ts_stack_error_count(const Stack *self, StackVersion version) { - StackNode *node = array_get(&self->heads, version)->node; - return node->error_count; -} - -void ts_stack_push(Stack *self, StackVersion version, Tree *tree, - bool is_pending, TSStateId state) { +void ts_stack_push(Stack *self, StackVersion version, Tree *tree, bool pending, TSStateId state) { StackHead *head = array_get(&self->heads, version); - StackNode *new_node = stack_node_new(head->node, tree, is_pending, state, &self->node_pool); + StackNode *new_node = stack_node_new(head->node, tree, pending, state, &self->node_pool); if (state == ERROR_STATE) { - new_node->links[0].push_count = head->push_count; - new_node->links[0].depth = head->depth; head->push_count = 0; - head->depth = 0; - } else { + } else if (!tree->extra) { head->push_count++; - if (!tree->extra) head->depth++; } stack_node_release(head->node, &self->node_pool); head->node = new_node; @@ -431,55 +406,20 @@ inline StackIterateAction iterate_callback(void *payload, const Iterator *iterat StackPopResult ts_stack_iterate(Stack *self, StackVersion version, StackIterateCallback callback, void *payload) { StackIterateSession session = {payload, callback}; - return stack__iter(self, version, iterate_callback, &session); + return stack__iter(self, version, iterate_callback, &session, true); } inline StackIterateAction pop_count_callback(void *payload, const Iterator *iterator) { - StackPopSession *pop_session = (StackPopSession *)payload; - - if (iterator->tree_count == pop_session->goal_tree_count) { - pop_session->found_valid_path = true; + unsigned *goal_tree_count = payload; + if (iterator->tree_count == *goal_tree_count) { return StackIteratePop | StackIterateStop; + } else { + return StackIterateNone; } - - if (iterator->node->state == ERROR_STATE) { - if (pop_session->found_valid_path || pop_session->found_error) { - return StackIterateStop; - } else { - pop_session->found_error = true; - return StackIteratePop | StackIterateStop; - } - } - return StackIterateNone; } -StackPopResult ts_stack_pop_count(Stack *self, StackVersion version, - uint32_t count) { - StackPopSession session = { - .goal_tree_count = count, - .found_error = false, - .found_valid_path = false, - }; - - StackPopResult pop = stack__iter(self, version, pop_count_callback, &session); - - if (session.found_error) { - if (session.found_valid_path) { - StackSlice error_slice = pop.slices.contents[0]; - ts_tree_array_delete(&error_slice.trees); - array_erase(&pop.slices, 0); - if (array_front(&pop.slices)->version != error_slice.version) { - ts_stack_remove_version(self, error_slice.version); - for (StackVersion i = 0; i < pop.slices.size; i++) { - pop.slices.contents[i].version--; - } - } - } else { - pop.stopped_at_error = true; - } - } - - return pop; +StackPopResult ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { + return stack__iter(self, version, pop_count_callback, &count, true); } inline StackIterateAction pop_pending_callback(void *payload, const Iterator *iterator) { @@ -495,7 +435,7 @@ inline StackIterateAction pop_pending_callback(void *payload, const Iterator *it } StackPopResult ts_stack_pop_pending(Stack *self, StackVersion version) { - StackPopResult pop = stack__iter(self, version, pop_pending_callback, NULL); + StackPopResult pop = stack__iter(self, version, pop_pending_callback, NULL, true); if (pop.slices.size > 0) { ts_stack_renumber_version(self, pop.slices.contents[0].version, version); pop.slices.contents[0].version = version; @@ -503,12 +443,78 @@ StackPopResult ts_stack_pop_pending(Stack *self, StackVersion version) { return pop; } +inline StackIterateAction pop_error_callback(void *payload, const Iterator *iterator) { + if (iterator->trees.size > 0) { + bool *found_error = payload; + if (!*found_error && iterator->trees.contents[0]->symbol == ts_builtin_sym_error) { + *found_error = true; + return StackIteratePop | StackIterateStop; + } else { + return StackIterateStop; + } + } else { + return StackIterateNone; + } +} + +StackPopResult ts_stack_pop_error(Stack *self, StackVersion version) { + StackNode *node = array_get(&self->heads, version)->node; + for (unsigned i = 0; i < node->link_count; i++) { + if (node->links[i].tree && node->links[i].tree->symbol == ts_builtin_sym_error) { + bool found_error = false; + return stack__iter(self, version, pop_error_callback, &found_error, true); + } + } + return (StackPopResult){.slices = array_new()}; +} + inline StackIterateAction pop_all_callback(void *payload, const Iterator *iterator) { return iterator->node->link_count == 0 ? StackIteratePop : StackIterateNone; } StackPopResult ts_stack_pop_all(Stack *self, StackVersion version) { - return stack__iter(self, version, pop_all_callback, NULL); + return stack__iter(self, version, pop_all_callback, NULL, true); +} + +typedef struct { + StackSummary *summary; + unsigned max_depth; +} SummarizeStackSession; + +inline StackIterateAction summarize_stack_callback(void *payload, const Iterator *iterator) { + SummarizeStackSession *session = payload; + TSStateId state = iterator->node->state; + unsigned depth = iterator->tree_count; + if (depth > session->max_depth) return StackIterateStop; + for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { + StackSummaryEntry entry = session->summary->contents[i]; + if (entry.depth < depth) break; + if (entry.depth == depth && entry.state == state) return StackIterateNone; + } + array_push(session->summary, ((StackSummaryEntry){ + .position = iterator->node->position, + .depth = depth, + .state = state, + })); + return StackIterateNone; +} + +void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { + SummarizeStackSession session = { + .summary = ts_malloc(sizeof(StackSummary)), + .max_depth = max_depth + }; + array_init(session.summary); + stack__iter(self, version, summarize_stack_callback, &session, false); + self->heads.contents[version].summary = session.summary; +} + +StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { + return array_get(&self->heads, version)->summary; +} + +unsigned ts_stack_depth_since_error(Stack *self, StackVersion version) { + return array_get(&self->heads, version)->node->depth; } void ts_stack_remove_version(Stack *self, StackVersion version) { @@ -536,6 +542,7 @@ StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { StackHead *head = array_back(&self->heads); stack_node_retain(head->node); if (head->last_external_token) ts_tree_retain(head->last_external_token); + head->summary = NULL; return self->heads.size - 1; } @@ -552,11 +559,11 @@ bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version StackHead *head1 = &self->heads.contents[version1]; StackHead *head2 = &self->heads.contents[version2]; return + !head1->is_halted && !head2->is_halted && head1->node->state == head2->node->state && head1->node->position.chars == head2->node->position.chars && - ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token) && - ((head1->node->error_count == 0 && head2->node->error_count == 0) || - (head1->depth == head2->depth)); + head1->node->depth == head2->node->depth && + ts_tree_external_token_state_eq(head1->last_external_token, head2->last_external_token); } void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion version2) { @@ -565,8 +572,6 @@ void ts_stack_force_merge(Stack *self, StackVersion version1, StackVersion versi for (uint32_t i = 0; i < head2->node->link_count; i++) { stack_node_add_link(head1->node, head2->node->links[i]); } - if (head2->push_count > head1->push_count) head1->push_count = head2->push_count; - if (head2->depth > head1->depth) head1->depth = head2->depth; ts_stack_remove_version(self, version2); } @@ -587,8 +592,6 @@ void ts_stack_clear(Stack *self) { array_push(&self->heads, ((StackHead){ .node = self->base_node, .last_external_token = NULL, - .depth = 0, - .push_count = 0, .is_halted = false, })); } @@ -612,8 +615,8 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf( f, "node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, " - "labeltooltip=\"push_count: %u\ndepth: %u", - i, head->node, i, head->push_count, head->depth); + "labeltooltip=\"push_count: %u\ndepth: %u", i, head->node, i, head->push_count, head->node->depth + ); if (head->last_external_token) { TSExternalTokenState *state = &head->last_external_token->external_token_state; @@ -654,10 +657,11 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { else fprintf(f, "label=\"%d\"", node->state); - fprintf(f, - " tooltip=\"position: %u,%u\nerror_count: %u\nerror_cost: %u\"];\n", - node->position.extent.row, node->position.extent.column, node->error_count, - node->error_cost); + fprintf( + f, + " tooltip=\"position: %u,%u\nerror_cost: %u\"];\n", + node->position.extent.row, node->position.extent.column, node->error_cost + ); for (int j = 0; j < node->link_count; j++) { StackLink link = node->links[j]; @@ -668,7 +672,7 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) { fprintf(f, "fontcolor=gray "); if (!link.tree) { - fprintf(f, "color=red, tooltip=\"push_count: %u, depth: %u\"", link.push_count, link.depth); + fprintf(f, "color=red"); } else if (link.tree->symbol == ts_builtin_sym_error) { fprintf(f, "label=\"ERROR\""); } else { diff --git a/src/runtime/stack.h b/src/runtime/stack.h index 1eeb5cd0..6bb0b40b 100644 --- a/src/runtime/stack.h +++ b/src/runtime/stack.h @@ -23,7 +23,6 @@ typedef struct { typedef Array(StackSlice) StackSliceArray; typedef struct { - bool stopped_at_error; StackSliceArray slices; } StackPopResult; @@ -34,6 +33,14 @@ enum { StackIteratePop = 2, }; +typedef struct { + Length position; + unsigned depth; + TSStateId state; +} StackSummaryEntry; + +typedef Array(StackSummaryEntry) StackSummary; + typedef StackIterateAction (*StackIterateCallback)(void *, TSStateId state, const TreeArray *trees, uint32_t tree_count); @@ -89,11 +96,19 @@ StackPopResult ts_stack_pop_count(Stack *, StackVersion, uint32_t count); StackPopResult ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *); +StackPopResult ts_stack_pop_error(Stack *, StackVersion); + StackPopResult ts_stack_pop_pending(Stack *, StackVersion); StackPopResult ts_stack_pop_all(Stack *, StackVersion); -ErrorStatus ts_stack_error_status(const Stack *, StackVersion); +unsigned ts_stack_depth_since_error(Stack *, StackVersion); + +void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth); + +StackSummary *ts_stack_get_summary(Stack *, StackVersion); + +unsigned ts_stack_error_cost(const Stack *, StackVersion version); bool ts_stack_merge(Stack *, StackVersion, StackVersion); diff --git a/test/benchmarks.cc b/test/benchmarks.cc index b070ec5e..616d2de6 100644 --- a/test/benchmarks.cc +++ b/test/benchmarks.cc @@ -83,7 +83,7 @@ int main(int argc, char *arg[]) { assert(!ts_node_has_error(ts_document_root_node(document))); size_t speed = static_cast(example.input.size()) / duration; printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); - non_error_speeds.push_back(speed); + if (speed != 0) non_error_speeds.push_back(speed); } for (auto &other_language_name : language_names) { @@ -102,7 +102,7 @@ int main(int argc, char *arg[]) { unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC; size_t speed = static_cast(example.input.size()) / duration; printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed); - error_speeds.push_back(speed); + if (speed != 0) error_speeds.push_back(speed); } } diff --git a/test/compiler/build_tables/lex_table_builder_test.cc b/test/compiler/build_tables/lex_table_builder_test.cc deleted file mode 100644 index e9f70aee..00000000 --- a/test/compiler/build_tables/lex_table_builder_test.cc +++ /dev/null @@ -1,122 +0,0 @@ -#include "test_helper.h" -#include "compiler/lexical_grammar.h" -#include "compiler/build_tables/lex_table_builder.h" - -using namespace build_tables; -using namespace rules; - -START_TEST - -describe("LexTableBuilder::detect_conflict", []() { - vector separators({ - CharacterSet({ ' ', '\t' }), - }); - - it("returns false for tokens that don't match the same string", [&]() { - auto builder = LexTableBuilder::create(LexicalGrammar{ - { - LexicalVariable{ - "token_0", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ 'a' }), - CharacterSet({ 'b' }), - CharacterSet({ 'c' }), - }), - false - }, - LexicalVariable{ - "token_1", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ 'b' }), - CharacterSet({ 'c' }), - CharacterSet({ 'd' }), - }), - false - }, - }, - separators - }); - - AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsFalse()); - AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse()); - }); - - it("returns true when the left token can match a string that the right token matches, " - "plus a separator character", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "token_0", - VariableTypeNamed, - Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/ - false - }, - LexicalVariable{ - "token_1", - VariableTypeNamed, - Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc' - true - }, - }, - separators - }; - - auto builder = LexTableBuilder::create(grammar); - AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue()); - AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse()); - - grammar.variables[1].is_string = false; - AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue()); - AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse()); - }); - - it("returns true when the left token matches a string that the right token matches, " - "plus the first character of some token that can follow the right token", [&]() { - LexicalGrammar grammar{ - { - LexicalVariable{ - "token_0", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ '>' }), - CharacterSet({ '=' }), - }), - true - }, - LexicalVariable{ - "token_1", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ '>' }), - }), - true - }, - LexicalVariable{ - "token_2", - VariableTypeNamed, - Rule::seq({ - CharacterSet({ '=' }), - }), - true - }, - }, - separators - }; - - // If no tokens can follow token_1, then there's no conflict - auto builder = LexTableBuilder::create(grammar); - vector> following_tokens_by_token_index(3); - AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsFalse()); - AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse()); - - // If token_2 can follow token_1, then token_0 conflicts with token_1 - builder = LexTableBuilder::create(grammar); - following_tokens_by_token_index[1].insert(2); - AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsTrue()); - AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse()); - }); -}); - -END_TEST diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index f9280ce3..7839337e 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -9,9 +9,11 @@ int x // no semicolon int a; #ifdef __cplusplus -extern "C" +extern "C" { #endif +int c() { return 5; } + int b; #ifdef __cplusplus @@ -23,20 +25,23 @@ int c; --- (translation_unit - (preproc_ifdef (identifier) + (preproc_ifdef + (identifier) (ERROR (type_identifier) (identifier)) (comment)) - (declaration (type_identifier) (identifier)) - - (preproc_ifdef (identifier) - (ERROR (string_literal))) - - (declaration (type_identifier) (identifier)) - - (preproc_ifdef (identifier) - (ERROR)) - + (preproc_ifdef + (identifier) + (linkage_specification + (string_literal) + (declaration_list + (ERROR) + (function_definition + (type_identifier) + (function_declarator (identifier) (parameter_list)) + (compound_statement (return_statement (number_literal)))) + (declaration (type_identifier) (identifier)) + (ERROR (identifier))))) (declaration (type_identifier) (identifier))) ======================================== @@ -76,8 +81,8 @@ int main() { (declaration (type_identifier) (init_declarator (identifier) (parenthesized_expression - (ERROR (number_literal)) - (number_literal))))))) + (number_literal) + (ERROR (number_literal)))))))) ======================================== Errors in declarations @@ -124,13 +129,13 @@ int b() { (compound_statement (declaration (type_identifier) + (ERROR (identifier) (identifier)) (init_declarator (identifier) - (ERROR (identifier) (identifier)) (number_literal))) (declaration (type_identifier) + (ERROR (identifier) (identifier)) (init_declarator (identifier) - (ERROR (identifier) (identifier)) (number_literal)))))) diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 45eb1c33..7f756028 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -12,12 +12,13 @@ e f; (program (if_statement (parenthesized_expression - (ERROR (identifier)) - (identifier)) + (identifier) + (ERROR (identifier))) (statement_block (ERROR (identifier)) (expression_statement (identifier)))) - (expression_statement (ERROR (identifier)) (identifier))) + (ERROR (identifier)) + (expression_statement (identifier))) ======================================================= multiple invalid tokens right after the viable prefix @@ -33,16 +34,13 @@ h i j k; (program (if_statement (parenthesized_expression - (ERROR (identifier)) (identifier) - (ERROR (identifier))) + (ERROR (identifier) (identifier))) (statement_block - (expression_statement - (identifier) - (ERROR (jsx_attribute (property_identifier)) (jsx_attribute (property_identifier)) (identifier))))) - (expression_statement - (identifier) - (ERROR (jsx_attribute (property_identifier)) (jsx_attribute (property_identifier)) (identifier)))) + (ERROR (identifier)) + (expression_statement (identifier) (ERROR (identifier) (identifier))))) + (ERROR (identifier)) + (expression_statement (identifier) (ERROR (identifier) (identifier)))) =================================================== one invalid subtree right after the viable prefix @@ -136,3 +134,17 @@ var x = !!! (function (identifier) (formal_parameters) (statement_block)) (function (identifier) (formal_parameters) (statement_block)) (ERROR (identifier))) + +========================================================= +Errors inside of a template string substitution +========================================================= + +const a = `b c ${d +} f g` + +--- + +(program + (lexical_declaration + (variable_declarator + (identifier) + (template_string (template_substitution (identifier) (ERROR)))))) diff --git a/test/helpers/record_alloc.cc b/test/helpers/record_alloc.cc index a3f0b49f..43e11abe 100644 --- a/test/helpers/record_alloc.cc +++ b/test/helpers/record_alloc.cc @@ -1,9 +1,9 @@ #include #include -#include +#include using std::map; -using std::set; +using std::vector; static bool _enabled = false; static size_t _allocation_count = 0; @@ -21,10 +21,10 @@ void stop() { _enabled = false; } -set outstanding_allocation_indices() { - set result; +vector outstanding_allocation_indices() { + vector result; for (const auto &entry : _outstanding_allocations) { - result.insert(entry.second); + result.push_back(entry.second); } return result; } @@ -38,9 +38,7 @@ size_t allocation_count() { extern "C" { static void *record_allocation(void *result) { - if (!_enabled) - return result; - + if (!_enabled) return result; _outstanding_allocations[result] = _allocation_count; _allocation_count++; return result; diff --git a/test/helpers/record_alloc.h b/test/helpers/record_alloc.h index 50cd62ad..1f5968ac 100644 --- a/test/helpers/record_alloc.h +++ b/test/helpers/record_alloc.h @@ -1,14 +1,14 @@ #ifndef HELPERS_RECORD_ALLOC_H_ #define HELPERS_RECORD_ALLOC_H_ -#include +#include namespace record_alloc { void start(); void stop(); void fail_at_allocation_index(size_t failure_index); -std::set outstanding_allocation_indices(); +std::vector outstanding_allocation_indices(); size_t allocation_count(); } // namespace record_alloc diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 2f60c0f0..420cf092 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -91,15 +91,15 @@ describe("Parser", [&]() { TSNode error = ts_node_named_child(ts_node_child(root, 0), 1); AssertThat(ts_node_type(error, document), Equals("ERROR")); - AssertThat(get_node_text(error), Equals(", @@@@@")); + AssertThat(get_node_text(error), Equals("@@@@@,")); AssertThat(ts_node_child_count(error), Equals(2)); - TSNode comma = ts_node_child(error, 0); - AssertThat(get_node_text(comma), Equals(",")); - - TSNode garbage = ts_node_child(error, 1); + TSNode garbage = ts_node_child(error, 0); AssertThat(get_node_text(garbage), Equals("@@@@@")); + TSNode comma = ts_node_child(error, 1); + AssertThat(get_node_text(comma), Equals(",")); + TSNode node_after_error = ts_node_next_named_sibling(error); AssertThat(ts_node_type(node_after_error, document), Equals("true")); AssertThat(get_node_text(node_after_error), Equals("true")); @@ -116,16 +116,17 @@ describe("Parser", [&]() { TSNode error = ts_node_named_child(ts_node_child(root, 0), 1); AssertThat(ts_node_type(error, document), Equals("ERROR")); + AssertThat(get_node_text(error), Equals("faaaaalse,")); AssertThat(ts_node_child_count(error), Equals(2)); - TSNode comma = ts_node_child(error, 0); - AssertThat(ts_node_type(comma, document), Equals(",")); - AssertThat(get_node_text(comma), Equals(",")); - - TSNode garbage = ts_node_child(error, 1); + TSNode garbage = ts_node_child(error, 0); AssertThat(ts_node_type(garbage, document), Equals("ERROR")); AssertThat(get_node_text(garbage), Equals("faaaaalse")); + TSNode comma = ts_node_child(error, 1); + AssertThat(ts_node_type(comma, document), Equals(",")); + AssertThat(get_node_text(comma), Equals(",")); + TSNode last = ts_node_next_named_sibling(error); AssertThat(ts_node_type(last, document), Equals("true")); AssertThat(ts_node_start_byte(last), Equals(strlen(" [123, faaaaalse, "))); @@ -166,7 +167,7 @@ describe("Parser", [&]() { ts_document_set_language(document, load_real_language("javascript")); set_text("a; ' this string never ends"); assert_root_node( - "(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))"); + "(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))"); }); }); @@ -198,7 +199,7 @@ describe("Parser", [&]() { free(string); - assert_root_node("(ERROR (UNEXPECTED INVALID))"); + assert_root_node("(program (ERROR (UNEXPECTED INVALID)))"); }); }); diff --git a/test/runtime/stack_test.cc b/test/runtime/stack_test.cc index e8a258ed..cb5abf2d 100644 --- a/test/runtime/stack_test.cc +++ b/test/runtime/stack_test.cc @@ -124,12 +124,6 @@ describe("Stack", [&]() { {1, 3}, }))); }); - - it("increments the version's push count", [&]() { - AssertThat(ts_stack_push_count(stack, 0), Equals(0)); - ts_stack_push(stack, 0, trees[0], false, stateA); - AssertThat(ts_stack_push_count(stack, 0), Equals(1)); - }); }); describe("merge()", [&]() { @@ -221,7 +215,6 @@ describe("Stack", [&]() { // ↑ // └─* StackPopResult pop = ts_stack_pop_count(stack, 0, 2); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(1)); AssertThat(ts_stack_version_count(stack), Equals(2)); @@ -240,7 +233,6 @@ describe("Stack", [&]() { // ↑ // └─* StackPopResult pop = ts_stack_pop_count(stack, 0, 2); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(1)); StackSlice slice = pop.slices.contents[0]; @@ -250,40 +242,6 @@ describe("Stack", [&]() { free_slice_array(&pop.slices); }); - it("stops popping entries early if it reaches an error tree", [&]() { - // . <──0── A <──1── B <──2── C <──3── ERROR <──4── D* - ts_stack_push(stack, 0, trees[3], false, ERROR_STATE); - ts_stack_push(stack, 0, trees[4], false, stateD); - - // . <──0── A <──1── B <──2── C <──3── ERROR <──4── D* - // ↑ - // └─* - StackPopResult pop = ts_stack_pop_count(stack, 0, 3); - AssertThat(pop.stopped_at_error, Equals(true)); - - AssertThat(ts_stack_version_count(stack), Equals(2)); - AssertThat(ts_stack_top_state(stack, 1), Equals(ERROR_STATE)); - - AssertThat(pop.slices.size, Equals(1)); - StackSlice slice = pop.slices.contents[0]; - AssertThat(slice.version, Equals(1)); - AssertThat(slice.trees, Equals(vector({ trees[4] }))); - - free_slice_array(&pop.slices); - }); - - it("preserves the push count of the popped version", [&]() { - // . <──0── A <──1── B <──2── C* - // ↑ - // └─* - StackPopResult pop = ts_stack_pop_count(stack, 0, 2); - - AssertThat(ts_stack_push_count(stack, 0), Equals(3)); - AssertThat(ts_stack_push_count(stack, 1), Equals(3)); - - free_slice_array(&pop.slices); - }); - describe("when the version has been merged", [&]() { before_each([&]() { // . <──0── A <──1── B <──2── C <──3── D <──10── I* @@ -475,7 +433,6 @@ describe("Stack", [&]() { ts_stack_push(stack, 0, trees[1], true, stateB); StackPopResult pop = ts_stack_pop_pending(stack, 0); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(1)); AssertThat(get_stack_entries(stack, 0), Equals(vector({ @@ -496,7 +453,6 @@ describe("Stack", [&]() { ts_stack_push(stack, 0, trees[3], false, stateB); StackPopResult pop = ts_stack_pop_pending(stack, 0); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(1)); AssertThat(pop.slices.contents[0].trees, Equals(vector({ trees[1], trees[2], trees[3] }))); @@ -513,7 +469,6 @@ describe("Stack", [&]() { ts_stack_push(stack, 0, trees[1], false, stateB); StackPopResult pop = ts_stack_pop_pending(stack, 0); - AssertThat(pop.stopped_at_error, Equals(false)); AssertThat(pop.slices.size, Equals(0)); AssertThat(get_stack_entries(stack, 0), Equals(vector({ diff --git a/tests.gyp b/tests.gyp index af11b878..02012a0a 100644 --- a/tests.gyp +++ b/tests.gyp @@ -39,7 +39,6 @@ 'sources': [ 'test/compiler/build_tables/lex_conflict_manager_test.cc', 'test/compiler/build_tables/lex_item_test.cc', - 'test/compiler/build_tables/lex_table_builder_test.cc', 'test/compiler/build_tables/parse_item_set_builder_test.cc', 'test/compiler/build_tables/rule_can_be_blank_test.cc', 'test/compiler/prepare_grammar/expand_repeats_test.cc',