Remove the concept of fragile reductions
They were a vestige of when Tree-sitter did sentential form-based incremental parsing (as opposed to simply state matching). This was elegant but not compatible with GLR as far as I could tell.
This commit is contained in:
parent
07fa3eb386
commit
52087de4f0
7 changed files with 20 additions and 66 deletions
|
|
@ -47,8 +47,7 @@ typedef struct {
|
|||
TSSymbol symbol;
|
||||
int16_t dynamic_precedence;
|
||||
uint8_t child_count;
|
||||
uint8_t alias_sequence_id : 7;
|
||||
bool fragile : 1;
|
||||
uint8_t alias_sequence_id;
|
||||
};
|
||||
} params;
|
||||
TSParseActionType type : 4;
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
ParseTable parse_table;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
unique_ptr<LexTableBuilder> lex_table_builder;
|
||||
set<ParseAction> fragile_reductions;
|
||||
unordered_map<Symbol, LookaheadSet> following_tokens_by_token;
|
||||
vector<LookaheadSet> coincident_tokens_by_token;
|
||||
|
||||
|
|
@ -103,7 +102,7 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
);
|
||||
|
||||
build_error_parse_state(error_state_id);
|
||||
mark_fragile_actions();
|
||||
remove_precedence_values();
|
||||
remove_duplicate_parse_states();
|
||||
|
||||
auto lex_table = lex_table_builder->build(&parse_table);
|
||||
|
|
@ -218,16 +217,11 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
entry.actions.push_back(action);
|
||||
} else {
|
||||
if (action.precedence > existing_action.precedence) {
|
||||
for (const ParseAction &old_action : entry.actions) {
|
||||
fragile_reductions.insert(old_action);
|
||||
}
|
||||
entry.actions.assign({action});
|
||||
lookaheads_with_conflicts.erase(lookahead);
|
||||
} else if (action.precedence == existing_action.precedence) {
|
||||
entry.actions.push_back(action);
|
||||
lookaheads_with_conflicts.insert(lookahead);
|
||||
} else {
|
||||
fragile_reductions.insert(action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -295,19 +289,14 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
return "";
|
||||
}
|
||||
|
||||
void mark_fragile_actions() {
|
||||
void remove_precedence_values() {
|
||||
for (ParseState &state : parse_table.states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
auto &actions = entry.second.actions;
|
||||
|
||||
for (ParseAction &action : actions) {
|
||||
if (action.type == ParseActionTypeReduce) {
|
||||
if (action_is_fragile(action)) {
|
||||
action.fragile = true;
|
||||
}
|
||||
action.precedence = 0;
|
||||
action.associativity = rules::AssociativityNone;
|
||||
}
|
||||
action.precedence = 0;
|
||||
action.associativity = rules::AssociativityNone;
|
||||
}
|
||||
|
||||
for (auto i = actions.begin(); i != actions.end();) {
|
||||
|
|
@ -327,27 +316,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
bool action_is_fragile(const ParseAction &action) {
|
||||
for (auto &fragile_action : fragile_reductions) {
|
||||
if (fragile_action.symbol == action.symbol &&
|
||||
fragile_action.consumed_symbol_count == action.consumed_symbol_count &&
|
||||
fragile_action.dynamic_precedence == action.dynamic_precedence) {
|
||||
if (fragile_action.precedence > action.precedence) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (fragile_action.precedence == action.precedence &&
|
||||
(fragile_action.associativity == action.associativity ||
|
||||
fragile_action.associativity == rules::AssociativityLeft ||
|
||||
action.associativity == rules::AssociativityRight)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void remove_duplicate_parse_states() {
|
||||
unordered_map<size_t, set<ParseStateId>> state_indices_by_signature;
|
||||
|
||||
|
|
@ -547,11 +515,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
(shift_precedence.min == reduction_precedence &&
|
||||
shift_precedence.max > reduction_precedence)) {
|
||||
entry.actions.assign({entry.actions.back()});
|
||||
for (const ParseAction &action : entry.actions) {
|
||||
if (action.type == ParseActionTypeReduce) {
|
||||
fragile_reductions.insert(action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the shift action has lower precedence, prefer the reduce actions.
|
||||
|
|
@ -595,11 +558,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
|
||||
if (!has_non_associative_reductions) {
|
||||
if (has_right_associative_reductions && !has_left_associative_reductions) {
|
||||
for (const ParseAction &action : entry.actions) {
|
||||
if (action.type == ParseActionTypeReduce) {
|
||||
fragile_reductions.insert(action);
|
||||
}
|
||||
}
|
||||
entry.actions.assign({entry.actions.back()});
|
||||
} else if (has_left_associative_reductions && !has_right_associative_reductions) {
|
||||
entry.actions.pop_back();
|
||||
|
|
@ -610,12 +568,6 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
|
||||
if (entry.actions.size() == 1) return "";
|
||||
|
||||
for (const ParseAction &action : entry.actions) {
|
||||
if (action.type == ParseActionTypeReduce) {
|
||||
fragile_reductions.insert(action);
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> actual_conflict;
|
||||
for (const ParseItem &item : conflicting_items) {
|
||||
Symbol symbol = item.lhs();
|
||||
|
|
|
|||
|
|
@ -635,10 +635,6 @@ class CCodeGenerator {
|
|||
add(", ");
|
||||
add(to_string(action.consumed_symbol_count));
|
||||
|
||||
if (action.fragile) {
|
||||
add(", .fragile = true");
|
||||
}
|
||||
|
||||
if (action.dynamic_precedence != 0) {
|
||||
add(", .dynamic_precedence = " + to_string(action.dynamic_precedence));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ ParseAction::ParseAction() :
|
|||
dynamic_precedence(0),
|
||||
associativity(rules::AssociativityNone),
|
||||
alias_sequence_id(0),
|
||||
fragile(false),
|
||||
extra(false),
|
||||
repetition(false) {}
|
||||
|
||||
|
|
@ -79,7 +78,6 @@ bool ParseAction::operator==(const ParseAction &other) const {
|
|||
associativity == other.associativity &&
|
||||
alias_sequence_id == other.alias_sequence_id &&
|
||||
extra == other.extra &&
|
||||
fragile == other.fragile &&
|
||||
repetition == other.repetition;
|
||||
}
|
||||
|
||||
|
|
@ -100,8 +98,6 @@ bool ParseAction::operator<(const ParseAction &other) const {
|
|||
if (other.associativity < associativity) return false;
|
||||
if (extra && !other.extra) return true;
|
||||
if (other.extra && !extra) return false;
|
||||
if (fragile && !other.fragile) return true;
|
||||
if (other.fragile && !fragile) return false;
|
||||
if (repetition && !other.repetition) return true;
|
||||
if (other.repetition && !repetition) return false;
|
||||
return alias_sequence_id < other.alias_sequence_id;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ struct ParseAction {
|
|||
int dynamic_precedence;
|
||||
rules::Associativity associativity;
|
||||
unsigned alias_sequence_id;
|
||||
bool fragile;
|
||||
bool extra;
|
||||
bool repetition;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1119,11 +1119,12 @@ static void parser__advance(Parser *self, StackVersion version, ReusableNode *re
|
|||
}
|
||||
|
||||
case TSParseActionTypeReduce: {
|
||||
bool is_fragile = table_entry.action_count > 1;
|
||||
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count);
|
||||
StackPopResult reduction = parser__reduce(
|
||||
self, version, action.params.symbol, action.params.child_count,
|
||||
action.params.dynamic_precedence, action.params.alias_sequence_id,
|
||||
action.params.fragile
|
||||
is_fragile
|
||||
);
|
||||
StackSlice slice = *array_front(&reduction.slices);
|
||||
last_reduction_version = slice.version;
|
||||
|
|
|
|||
|
|
@ -270,7 +270,15 @@ describe("Parser", [&]() {
|
|||
"(parenthesized_expression "
|
||||
"(binary_expression (number) (member_expression (identifier) (property_identifier)))))))");
|
||||
|
||||
AssertThat(input->strings_read(), Equals(vector<string>({ " abc.d);" })));
|
||||
AssertThat(input->strings_read(), Equals(vector<string>({
|
||||
// The '*' is not reused because the preceding `x` expression is reused, which
|
||||
// puts the parser into a different state than when the `*` was initially tokenized.
|
||||
// When the `*` was initially tokenized, `x` was just an identifier. In both of these
|
||||
// states, external tokens are valid so we don't reuse tokens unless the lex states
|
||||
// match. This could probably be improved somehow.
|
||||
" * ",
|
||||
" abc.d);"
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -295,7 +303,10 @@ describe("Parser", [&]() {
|
|||
"(number) "
|
||||
"(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier))))))))");
|
||||
|
||||
AssertThat(input->strings_read(), Equals(vector<string>({"123 || 5 "})));
|
||||
AssertThat(input->strings_read(), Equals(vector<string>({
|
||||
"123 || 5 ",
|
||||
";"
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue