From b76574e01ca9baa3249d327cce8fd28d68900783 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 6 Sep 2016 10:22:16 -0700 Subject: [PATCH] Handle ambiguities between extra and non-extra tokens using normal GLR splitting --- include/tree_sitter/parser.h | 8 --- .../parse_conflict_manager_spec.cc | 12 ---- .../error_corpus/javascript_errors.txt | 34 +++++++++-- .../build_tables/build_parse_table.cc | 26 +------- src/compiler/generate_code/c_code.cc | 4 +- src/compiler/parse_table.cc | 16 +++-- src/compiler/parse_table.h | 2 +- src/runtime/parser.c | 61 ++++++++----------- 8 files changed, 63 insertions(+), 100 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 5d89efc2..c1a6dea9 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -136,14 +136,6 @@ struct TSLanguage { { .type = TSParseActionTypeShift, .extra = true } \ } -#define REDUCE_EXTRA(symbol_val) \ - { \ - { \ - .type = TSParseActionTypeReduce, .symbol = symbol_val, .child_count = 1, \ - .extra = true, \ - } \ - } - #define REDUCE(symbol_val, child_count_val) \ { \ { \ diff --git a/spec/compiler/build_tables/parse_conflict_manager_spec.cc b/spec/compiler/build_tables/parse_conflict_manager_spec.cc index 714e8b1c..c2dfe8fc 100644 --- a/spec/compiler/build_tables/parse_conflict_manager_spec.cc +++ b/spec/compiler/build_tables/parse_conflict_manager_spec.cc @@ -66,18 +66,6 @@ describe("ParseConflictManager", []() { }); }); - describe("reduce-extra actions", [&]() { - it("favors shift-extra actions over reduce-extra actions", [&]() { - result = conflict_manager->resolve(ParseAction::ShiftExtra(), ParseAction::ReduceExtra(sym1)); - AssertThat(result.first, IsTrue()); - AssertThat(result.second, Equals(ConflictTypeNone)); - - result = conflict_manager->resolve(ParseAction::ReduceExtra(sym1), ParseAction::ShiftExtra()); - AssertThat(result.first, IsFalse()); - AssertThat(result.second, Equals(ConflictTypeNone)); - }); - }); - describe("shift/reduce conflicts", [&]() { describe("when the shift has higher precedence", [&]() { ParseAction shift = ParseAction::Shift(2, {3, 4}); diff --git a/spec/fixtures/error_corpus/javascript_errors.txt b/spec/fixtures/error_corpus/javascript_errors.txt index b8ddc33a..0c5e976e 100644 --- a/spec/fixtures/error_corpus/javascript_errors.txt +++ b/spec/fixtures/error_corpus/javascript_errors.txt @@ -16,7 +16,8 @@ e f; (statement_block (ERROR (identifier)) (expression_statement (identifier)))) - (expression_statement (ERROR (identifier)) (identifier))) + (ERROR (identifier)) + (expression_statement (identifier))) ======================================================= multiple invalid tokens right after the viable prefix @@ -52,16 +53,16 @@ if ({a: 'b'} {c: 'd'}) { (program (if_statement - (ERROR (object (pair (identifier) (string)))) (object (pair (identifier) (string))) + (ERROR (object (pair (identifier) (string)))) (statement_block + (ERROR (function + (formal_parameters (identifier)) + (statement_block (expression_statement (identifier))))) (expression_statement (function (formal_parameters (identifier)) - (statement_block (expression_statement (identifier)))) - (ERROR (function - (formal_parameters (identifier)) - (statement_block (expression_statement (identifier))))))))) + (statement_block (expression_statement (identifier)))))))) =================================================== one invalid token at the end of the file @@ -76,3 +77,24 @@ a.b = (trailing_expression_statement (member_access (identifier) (identifier))) (ERROR)) + +=================================================== +Multi-line chained expressions in var declarations +=================================================== + +const one = two + .three(four) + .five() + +--- + +(program + (var_declaration (var_assignment + (identifier) + (function_call + (member_access + (function_call + (member_access (identifier) (identifier)) + (arguments (identifier))) + (identifier)) + (arguments))))) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index def80f55..16e7101d 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -81,9 +81,6 @@ class ParseTableBuilder { process_part_state_queue(); allow_any_conflict = false; - for (ParseStateId state = 0; state < parse_table.states.size(); state++) - add_reduce_extra_actions(state); - mark_fragile_actions(); remove_duplicate_parse_states(); @@ -198,31 +195,10 @@ class ParseTableBuilder { ParseAction action = ParseAction::ShiftExtra(); ParseState &state = parse_table.states[state_id]; for (const Symbol &extra_symbol : grammar.extra_tokens) - if (!state.entries.count(extra_symbol) || - (allow_any_conflict && - state.entries[extra_symbol].actions.back().type == - ParseActionTypeReduce)) + if (!state.entries.count(extra_symbol) || state.has_shift_action() || allow_any_conflict) parse_table.add_action(state_id, extra_symbol, action); } - void add_reduce_extra_actions(ParseStateId state_id) { - const ParseState &state = parse_table.states[state_id]; - - for (const Symbol &extra_symbol : grammar.extra_tokens) { - const auto &entry_for_symbol = state.entries.find(extra_symbol); - if (entry_for_symbol == state.entries.end()) - continue; - - for (const ParseAction &action : entry_for_symbol->second.actions) - if (action.type == ParseActionTypeShift && !action.extra) { - size_t dest_state_id = action.state_index; - ParseAction reduce_extra = ParseAction::ReduceExtra(extra_symbol); - for (const auto &pair : state.entries) - add_action(dest_state_id, pair.first, reduce_extra, null_item_set); - } - } - } - void mark_fragile_actions() { for (ParseState &state : parse_table.states) { set symbols_with_multiple_actions; diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index 7ee66d5a..714d3045 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -340,9 +340,7 @@ class CCodeGenerator { } break; case ParseActionTypeReduce: - if (action.extra) { - add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")"); - } else if (action.fragile) { + if (action.fragile) { add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " + to_string(action.consumed_symbol_count) + ")"); } else { diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 6efdac28..808c4ce2 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -65,15 +65,6 @@ ParseAction ParseAction::ShiftExtra() { return action; } -ParseAction ParseAction::ReduceExtra(Symbol symbol) { - ParseAction action; - action.type = ParseActionTypeReduce; - action.extra = true; - action.symbol = symbol; - action.consumed_symbol_count = 1; - return action; -} - ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, int precedence, rules::Associativity associativity, @@ -133,6 +124,13 @@ bool ParseTableEntry::operator==(const ParseTableEntry &other) const { ParseState::ParseState() : lex_state_id(-1) {} +bool ParseState::has_shift_action() const { + for (const auto &pair : entries) + if (pair.second.actions.size() > 0 && pair.second.actions.back().type == ParseActionTypeShift) + return true; + return false; +} + set ParseState::expected_inputs() const { set result; for (auto &entry : entries) diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 4ffcb273..1a00b273 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -38,7 +38,6 @@ class ParseAction { int precedence, rules::Associativity, const Production &); static ParseAction ShiftExtra(); - static ParseAction ReduceExtra(rules::Symbol symbol); bool operator==(const ParseAction &) const; bool operator<(const ParseAction &) const; @@ -74,6 +73,7 @@ class ParseState { bool operator==(const ParseState &) const; bool merge(const ParseState &); void each_advance_action(std::function); + bool has_shift_action() const; std::map entries; LexStateId lex_state_id; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 2df15c77..2eba4663 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -472,7 +472,7 @@ static bool parser__switch_children(Parser *self, TSTree *tree, } static Reduction parser__reduce(Parser *self, StackVersion version, - TSSymbol symbol, unsigned count, bool extra, + TSSymbol symbol, unsigned count, bool fragile, bool allow_skipping) { size_t initial_version_count = ts_stack_version_count(self->stack); StackPopResult pop = ts_stack_pop_count(self->stack, version, count); @@ -531,41 +531,34 @@ static Reduction parser__reduce(Parser *self, StackVersion version, parent->parse_state = state; } - TSStateId new_state; - if (extra) { - parent->extra = true; - new_state = state; - } else { - const TSParseAction *action = - ts_language_last_action(language, state, symbol); - assert(action->type == TSParseActionTypeShift || - action->type == TSParseActionTypeRecover); - new_state = action->to_state; + const TSParseAction *action = + ts_language_last_action(language, state, symbol); + assert(action->type == TSParseActionTypeShift || + action->type == TSParseActionTypeRecover); - if (action->type == TSParseActionTypeRecover && child_count > 1 && - allow_skipping) { - StackVersion other_version = - ts_stack_duplicate_version(self->stack, slice.version); - CHECK(other_version != STACK_VERSION_NONE); + if (action->type == TSParseActionTypeRecover && child_count > 1 && + allow_skipping) { + StackVersion other_version = + ts_stack_duplicate_version(self->stack, slice.version); + CHECK(other_version != STACK_VERSION_NONE); - CHECK(ts_stack_push(self->stack, other_version, parent, false, + CHECK(ts_stack_push(self->stack, other_version, parent, false, + TS_STATE_ERROR)); + for (size_t j = parent->child_count; j < slice.trees.size; j++) { + TSTree *tree = slice.trees.contents[j]; + CHECK(ts_stack_push(self->stack, other_version, tree, false, TS_STATE_ERROR)); - for (size_t j = parent->child_count; j < slice.trees.size; j++) { - TSTree *tree = slice.trees.contents[j]; - CHECK(ts_stack_push(self->stack, other_version, tree, false, - TS_STATE_ERROR)); - } - - ErrorStatus error_status = ts_stack_error_status(self->stack, other_version); - if (parser__better_version_exists(self, version, error_status)) - ts_stack_remove_version(self->stack, other_version); } + + ErrorStatus error_status = ts_stack_error_status(self->stack, other_version); + if (parser__better_version_exists(self, version, error_status)) + ts_stack_remove_version(self->stack, other_version); } - CHECK(parser__push(self, slice.version, parent, new_state)); + CHECK(parser__push(self, slice.version, parent, action->to_state)); for (size_t j = parent->child_count; j < slice.trees.size; j++) { TSTree *tree = slice.trees.contents[j]; - CHECK(parser__push(self, slice.version, tree, new_state)); + CHECK(parser__push(self, slice.version, tree, action->to_state)); } } @@ -899,7 +892,7 @@ static PotentialReductionStatus parser__do_potential_reductions( for (size_t i = 0; i < self->reduce_actions.size; i++) { ReduceAction action = self->reduce_actions.contents[i]; Reduction reduction = parser__reduce(self, version, action.symbol, - action.count, false, true, false); + action.count, true, false); switch (reduction.status) { case ReduceFailed: goto error; @@ -1128,15 +1121,11 @@ static bool parser__advance(Parser *self, StackVersion version, if (reduction_stopped_at_error) continue; - if (action.extra) { - LOG("reduce_extra"); - } else { - LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), - action.child_count); - } + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), + action.child_count); Reduction reduction = parser__reduce( - self, version, action.symbol, action.child_count, action.extra, + self, version, action.symbol, action.child_count, (i < table_entry.action_count - 1), true); switch (reduction.status) {