Record in parse table which actions can hide splits
Suppose a parse state S has multiple actions for a terminal lookahead symbol A. Then during incremental parsing, while in state S, the parser should not reuse a non-terminal lookahead B where FIRST(B) contains A, because reusing B might prematurely discard one of the possible actions that a batch parser would have attempted in state S, upon seeing A as a lookahead.
This commit is contained in:
parent
7fbb628c78
commit
c495076adb
19 changed files with 58613 additions and 60661 deletions
|
|
@ -79,7 +79,7 @@ class ParseTableBuilder {
|
|||
add_reduce_extra_actions(state);
|
||||
}
|
||||
|
||||
mark_fragile_reductions();
|
||||
mark_fragile_actions();
|
||||
remove_duplicate_states();
|
||||
|
||||
parse_table.symbols.insert({ rules::ERROR(), {} });
|
||||
|
|
@ -134,9 +134,9 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void add_shift_extra_actions(ParseStateId state_id) {
|
||||
ParseAction action = ParseAction::ShiftExtra();
|
||||
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens)
|
||||
add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra(),
|
||||
null_item_set);
|
||||
add_action(state_id, ubiquitous_symbol, action, null_item_set);
|
||||
}
|
||||
|
||||
void add_reduce_extra_actions(ParseStateId state_id) {
|
||||
|
|
@ -148,7 +148,7 @@ class ParseTableBuilder {
|
|||
continue;
|
||||
|
||||
for (const ParseAction &action : actions_for_symbol->second)
|
||||
if (action.type == ParseActionTypeShift) {
|
||||
if (action.type == ParseActionTypeShift && !action.extra) {
|
||||
size_t dest_state_id = action.state_index;
|
||||
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
|
||||
for (const auto &pair : state.actions)
|
||||
|
|
@ -157,14 +157,36 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
void mark_fragile_reductions() {
|
||||
void mark_fragile_actions() {
|
||||
for (ParseState &state : parse_table.states) {
|
||||
set<Symbol> symbols_with_multiple_actions;
|
||||
|
||||
for (auto &entry : state.actions) {
|
||||
if (entry.second.size() > 1)
|
||||
symbols_with_multiple_actions.insert(entry.first);
|
||||
|
||||
for (ParseAction &action : entry.second) {
|
||||
if (action.type == ParseActionTypeReduce) {
|
||||
if (action.type == ParseActionTypeReduce && !action.extra) {
|
||||
if (has_fragile_production(action.production))
|
||||
action.type = ParseActionTypeReduceFragile;
|
||||
action.fragile = true;
|
||||
|
||||
action.production = NULL;
|
||||
action.precedence_range = PrecedenceRange();
|
||||
action.associativity = rules::AssociativityNone;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!symbols_with_multiple_actions.empty()) {
|
||||
for (auto &entry : state.actions) {
|
||||
if (!entry.first.is_token) {
|
||||
set<Symbol> first_set = get_first_set(entry.first);
|
||||
for (const Symbol &symbol : symbols_with_multiple_actions) {
|
||||
if (first_set.count(symbol)) {
|
||||
entry.second[0].can_hide_split = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -175,6 +197,7 @@ class ParseTableBuilder {
|
|||
bool done = false;
|
||||
while (!done) {
|
||||
done = true;
|
||||
|
||||
map<ParseStateId, ParseStateId> replacements;
|
||||
for (size_t i = 0, size = parse_table.states.size(); i < size; i++) {
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
|
|
@ -210,9 +233,8 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
for (auto replacement = replacements.rbegin(); replacement != replacements.rend(); ++replacement) {
|
||||
parse_table.states.erase(parse_table.states.begin() + replacement->first);
|
||||
}
|
||||
for (auto i = replacements.rbegin(); i != replacements.rend(); ++i)
|
||||
parse_table.states.erase(parse_table.states.begin() + i->first);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,12 +22,12 @@ pair<bool, ConflictType> ParseConflictManager::resolve(
|
|||
|
||||
switch (old_action.type) {
|
||||
case ParseActionTypeError:
|
||||
case ParseActionTypeShiftExtra:
|
||||
case ParseActionTypeReduceExtra:
|
||||
return { true, ConflictTypeNone };
|
||||
|
||||
case ParseActionTypeShift:
|
||||
if (new_action.type == ParseActionTypeReduce) {
|
||||
if (new_action.extra)
|
||||
return { false, ConflictTypeNone };
|
||||
int min_precedence = old_action.precedence_range.min;
|
||||
int max_precedence = old_action.precedence_range.max;
|
||||
int new_precedence = new_action.precedence_range.max;
|
||||
|
|
@ -54,6 +54,12 @@ pair<bool, ConflictType> ParseConflictManager::resolve(
|
|||
}
|
||||
|
||||
case ParseActionTypeReduce:
|
||||
if (new_action.extra)
|
||||
return { false, ConflictTypeNone };
|
||||
if (old_action.extra)
|
||||
return { true, ConflictTypeNone };
|
||||
if (new_action.extra)
|
||||
return { false, ConflictTypeNone };
|
||||
if (new_action.type == ParseActionTypeReduce) {
|
||||
int old_precedence = old_action.precedence_range.min;
|
||||
int new_precedence = new_action.precedence_range.min;
|
||||
|
|
|
|||
|
|
@ -21,8 +21,7 @@ ParseItem::ParseItem(const Symbol &lhs, const Production &production,
|
|||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
return ((variable_index == other.variable_index) &&
|
||||
(step_index == other.step_index) &&
|
||||
(production == other.production));
|
||||
(step_index == other.step_index) && (production == other.production));
|
||||
}
|
||||
|
||||
bool ParseItem::operator<(const ParseItem &other) const {
|
||||
|
|
@ -75,7 +74,7 @@ rules::Associativity ParseItem::associativity() const {
|
|||
size_t ParseItem::Hash::operator()(const ParseItem &item) const {
|
||||
size_t result = hash<int>()(item.variable_index);
|
||||
result ^= hash<unsigned int>()(item.step_index);
|
||||
result ^= hash<void *>()((void *)item.production);
|
||||
result ^= hash<const void *>()(static_cast<const void *>(item.production));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -329,21 +329,23 @@ class CCodeGenerator {
|
|||
add("ACCEPT_INPUT()");
|
||||
break;
|
||||
case ParseActionTypeShift:
|
||||
add("SHIFT(" + to_string(action.state_index) + ")");
|
||||
break;
|
||||
case ParseActionTypeShiftExtra:
|
||||
add("SHIFT_EXTRA()");
|
||||
break;
|
||||
case ParseActionTypeReduceFragile:
|
||||
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ")");
|
||||
if (action.extra) {
|
||||
add("SHIFT_EXTRA()");
|
||||
} else {
|
||||
add("SHIFT(" + to_string(action.state_index) + ", ");
|
||||
add_action_flags(action);
|
||||
add(")");
|
||||
}
|
||||
break;
|
||||
case ParseActionTypeReduce:
|
||||
add("REDUCE(" + symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ")");
|
||||
break;
|
||||
case ParseActionTypeReduceExtra:
|
||||
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
|
||||
if (action.extra) {
|
||||
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
|
||||
} else {
|
||||
add("REDUCE(" + symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ", ");
|
||||
add_action_flags(action);
|
||||
add(")");
|
||||
}
|
||||
break;
|
||||
default: {}
|
||||
}
|
||||
|
|
@ -351,6 +353,17 @@ class CCodeGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
void add_action_flags(const ParseAction &action) {
|
||||
if (action.fragile && action.can_hide_split)
|
||||
add("FRAGILE|CAN_HIDE_SPLIT");
|
||||
else if (action.fragile)
|
||||
add("FRAGILE");
|
||||
else if (action.can_hide_split)
|
||||
add("CAN_HIDE_SPLIT");
|
||||
else
|
||||
add("0");
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
string lex_state_index(size_t i) {
|
||||
|
|
|
|||
|
|
@ -17,6 +17,9 @@ ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
|
|||
rules::Associativity associativity,
|
||||
const Production *production)
|
||||
: type(type),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
can_hide_split(false),
|
||||
symbol(symbol),
|
||||
state_index(state_index),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
|
|
@ -26,6 +29,9 @@ ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
|
|||
|
||||
ParseAction::ParseAction()
|
||||
: type(ParseActionTypeError),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
can_hide_split(false),
|
||||
symbol(Symbol(-1)),
|
||||
state_index(-1),
|
||||
consumed_symbol_count(0),
|
||||
|
|
@ -49,14 +55,17 @@ ParseAction ParseAction::Shift(ParseStateId state_index,
|
|||
|
||||
ParseAction ParseAction::ShiftExtra() {
|
||||
ParseAction action;
|
||||
action.type = ParseActionTypeShiftExtra;
|
||||
action.type = ParseActionTypeShift;
|
||||
action.extra = true;
|
||||
return action;
|
||||
}
|
||||
|
||||
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
|
||||
ParseAction action;
|
||||
action.type = ParseActionTypeReduceExtra;
|
||||
action.type = ParseActionTypeReduce;
|
||||
action.extra = true;
|
||||
action.symbol = symbol;
|
||||
action.consumed_symbol_count = 1;
|
||||
return action;
|
||||
}
|
||||
|
||||
|
|
@ -69,13 +78,11 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
|
|||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
return (
|
||||
type == other.type &&
|
||||
symbol == other.symbol &&
|
||||
state_index == other.state_index &&
|
||||
production == other.production &&
|
||||
consumed_symbol_count == other.consumed_symbol_count
|
||||
);
|
||||
return (type == other.type && extra == other.extra &&
|
||||
fragile == other.fragile && can_hide_split == other.can_hide_split &&
|
||||
symbol == other.symbol && state_index == other.state_index &&
|
||||
production == other.production &&
|
||||
consumed_symbol_count == other.consumed_symbol_count);
|
||||
}
|
||||
|
||||
bool ParseAction::operator<(const ParseAction &other) const {
|
||||
|
|
@ -83,6 +90,18 @@ bool ParseAction::operator<(const ParseAction &other) const {
|
|||
return true;
|
||||
if (other.type < type)
|
||||
return false;
|
||||
if (extra && !other.extra)
|
||||
return true;
|
||||
if (other.extra && !extra)
|
||||
return false;
|
||||
if (fragile && !other.fragile)
|
||||
return true;
|
||||
if (other.fragile && !fragile)
|
||||
return false;
|
||||
if (can_hide_split && !other.can_hide_split)
|
||||
return true;
|
||||
if (other.can_hide_split && !can_hide_split)
|
||||
return false;
|
||||
if (symbol < other.symbol)
|
||||
return true;
|
||||
if (other.symbol < symbol)
|
||||
|
|
@ -121,16 +140,20 @@ ParseStateId ParseTable::add_state() {
|
|||
|
||||
ParseAction &ParseTable::set_action(ParseStateId id, Symbol symbol,
|
||||
ParseAction action) {
|
||||
bool structural = action.type != ParseActionTypeShiftExtra;
|
||||
symbols[symbol].structural += structural;
|
||||
if (action.extra)
|
||||
symbols[symbol];
|
||||
else
|
||||
symbols[symbol].structural = true;
|
||||
states[id].actions[symbol] = vector<ParseAction>({ action });
|
||||
return *states[id].actions[symbol].begin();
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
|
||||
ParseAction action) {
|
||||
bool structural = action.type != ParseActionTypeShiftExtra;
|
||||
symbols[symbol].structural += structural;
|
||||
if (action.extra)
|
||||
symbols[symbol];
|
||||
else
|
||||
symbols[symbol].structural = true;
|
||||
states[id].actions[symbol].push_back(action);
|
||||
return *states[id].actions[symbol].rbegin();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,10 +17,6 @@ typedef uint64_t ParseStateId;
|
|||
|
||||
typedef enum {
|
||||
ParseActionTypeError,
|
||||
ParseActionTypeReduceExtra,
|
||||
ParseActionTypeReduceFragile,
|
||||
ParseActionTypeShiftExtra,
|
||||
|
||||
ParseActionTypeShift,
|
||||
ParseActionTypeReduce,
|
||||
ParseActionTypeAccept,
|
||||
|
|
@ -45,6 +41,9 @@ class ParseAction {
|
|||
bool operator<(const ParseAction &) const;
|
||||
|
||||
ParseActionType type;
|
||||
bool extra;
|
||||
bool fragile;
|
||||
bool can_hide_split;
|
||||
rules::Symbol symbol;
|
||||
ParseStateId state_index;
|
||||
size_t consumed_symbol_count;
|
||||
|
|
@ -64,6 +63,8 @@ struct hash<tree_sitter::ParseAction> {
|
|||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.consumed_symbol_count) ^
|
||||
hash<bool>()(action.extra) ^ hash<bool>()(action.fragile) ^
|
||||
hash<bool>()(action.can_hide_split) ^
|
||||
hash<int>()(action.associativity) ^
|
||||
hash<int>()(action.precedence_range.min) ^
|
||||
hash<int>()(action.precedence_range.max) ^
|
||||
|
|
|
|||
|
|
@ -81,7 +81,8 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
|||
vector<Production> productions;
|
||||
for (const rule_ptr &rule_component : extract_choices(variable.rule)) {
|
||||
Production production = FlattenRule().flatten(rule_component);
|
||||
if (std::find(productions.begin(), productions.end(), production) == productions.end())
|
||||
if (std::find(productions.begin(), productions.end(), production) ==
|
||||
productions.end())
|
||||
productions.push_back(production);
|
||||
}
|
||||
result.variables.push_back(
|
||||
|
|
|
|||
|
|
@ -21,9 +21,7 @@ SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
|
|||
|
||||
ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
|
||||
rules::Associativity associativity)
|
||||
: symbol(symbol),
|
||||
precedence(precedence),
|
||||
associativity(associativity) {}
|
||||
: symbol(symbol), precedence(precedence), associativity(associativity) {}
|
||||
|
||||
bool ProductionStep::operator==(const ProductionStep &other) const {
|
||||
return symbol == other.symbol && precedence == other.precedence &&
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue