Record in parse table which actions can hide splits

Suppose a parse state S has multiple actions for a terminal lookahead symbol A.
Then during incremental parsing, while in state S, the parser should not
reuse a non-terminal lookahead B where FIRST(B) contains A, because reusing B
might prematurely discard one of the possible actions that a batch parser
would have attempted in state S, upon seeing A as a lookahead.
This commit is contained in:
Max Brunsfeld 2015-12-17 12:48:55 -08:00
parent 7fbb628c78
commit c495076adb
19 changed files with 58613 additions and 60661 deletions

View file

@ -79,7 +79,7 @@ class ParseTableBuilder {
add_reduce_extra_actions(state);
}
mark_fragile_reductions();
mark_fragile_actions();
remove_duplicate_states();
parse_table.symbols.insert({ rules::ERROR(), {} });
@ -134,9 +134,9 @@ class ParseTableBuilder {
}
void add_shift_extra_actions(ParseStateId state_id) {
ParseAction action = ParseAction::ShiftExtra();
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens)
add_action(state_id, ubiquitous_symbol, ParseAction::ShiftExtra(),
null_item_set);
add_action(state_id, ubiquitous_symbol, action, null_item_set);
}
void add_reduce_extra_actions(ParseStateId state_id) {
@ -148,7 +148,7 @@ class ParseTableBuilder {
continue;
for (const ParseAction &action : actions_for_symbol->second)
if (action.type == ParseActionTypeShift) {
if (action.type == ParseActionTypeShift && !action.extra) {
size_t dest_state_id = action.state_index;
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
for (const auto &pair : state.actions)
@ -157,14 +157,36 @@ class ParseTableBuilder {
}
}
void mark_fragile_reductions() {
void mark_fragile_actions() {
for (ParseState &state : parse_table.states) {
set<Symbol> symbols_with_multiple_actions;
for (auto &entry : state.actions) {
if (entry.second.size() > 1)
symbols_with_multiple_actions.insert(entry.first);
for (ParseAction &action : entry.second) {
if (action.type == ParseActionTypeReduce) {
if (action.type == ParseActionTypeReduce && !action.extra) {
if (has_fragile_production(action.production))
action.type = ParseActionTypeReduceFragile;
action.fragile = true;
action.production = NULL;
action.precedence_range = PrecedenceRange();
action.associativity = rules::AssociativityNone;
}
}
}
if (!symbols_with_multiple_actions.empty()) {
for (auto &entry : state.actions) {
if (!entry.first.is_token) {
set<Symbol> first_set = get_first_set(entry.first);
for (const Symbol &symbol : symbols_with_multiple_actions) {
if (first_set.count(symbol)) {
entry.second[0].can_hide_split = true;
break;
}
}
}
}
}
@ -175,6 +197,7 @@ class ParseTableBuilder {
bool done = false;
while (!done) {
done = true;
map<ParseStateId, ParseStateId> replacements;
for (size_t i = 0, size = parse_table.states.size(); i < size; i++) {
for (size_t j = 0; j < i; j++) {
@ -210,9 +233,8 @@ class ParseTableBuilder {
}
}
for (auto replacement = replacements.rbegin(); replacement != replacements.rend(); ++replacement) {
parse_table.states.erase(parse_table.states.begin() + replacement->first);
}
for (auto i = replacements.rbegin(); i != replacements.rend(); ++i)
parse_table.states.erase(parse_table.states.begin() + i->first);
}
}

View file

@ -22,12 +22,12 @@ pair<bool, ConflictType> ParseConflictManager::resolve(
switch (old_action.type) {
case ParseActionTypeError:
case ParseActionTypeShiftExtra:
case ParseActionTypeReduceExtra:
return { true, ConflictTypeNone };
case ParseActionTypeShift:
if (new_action.type == ParseActionTypeReduce) {
if (new_action.extra)
return { false, ConflictTypeNone };
int min_precedence = old_action.precedence_range.min;
int max_precedence = old_action.precedence_range.max;
int new_precedence = new_action.precedence_range.max;
@ -54,6 +54,12 @@ pair<bool, ConflictType> ParseConflictManager::resolve(
}
case ParseActionTypeReduce:
if (new_action.extra)
return { false, ConflictTypeNone };
if (old_action.extra)
return { true, ConflictTypeNone };
if (new_action.extra)
return { false, ConflictTypeNone };
if (new_action.type == ParseActionTypeReduce) {
int old_precedence = old_action.precedence_range.min;
int new_precedence = new_action.precedence_range.min;

View file

@ -21,8 +21,7 @@ ParseItem::ParseItem(const Symbol &lhs, const Production &production,
bool ParseItem::operator==(const ParseItem &other) const {
return ((variable_index == other.variable_index) &&
(step_index == other.step_index) &&
(production == other.production));
(step_index == other.step_index) && (production == other.production));
}
bool ParseItem::operator<(const ParseItem &other) const {
@ -75,7 +74,7 @@ rules::Associativity ParseItem::associativity() const {
size_t ParseItem::Hash::operator()(const ParseItem &item) const {
size_t result = hash<int>()(item.variable_index);
result ^= hash<unsigned int>()(item.step_index);
result ^= hash<void *>()((void *)item.production);
result ^= hash<const void *>()(static_cast<const void *>(item.production));
return result;
}

View file

@ -329,21 +329,23 @@ class CCodeGenerator {
add("ACCEPT_INPUT()");
break;
case ParseActionTypeShift:
add("SHIFT(" + to_string(action.state_index) + ")");
break;
case ParseActionTypeShiftExtra:
add("SHIFT_EXTRA()");
break;
case ParseActionTypeReduceFragile:
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
if (action.extra) {
add("SHIFT_EXTRA()");
} else {
add("SHIFT(" + to_string(action.state_index) + ", ");
add_action_flags(action);
add(")");
}
break;
case ParseActionTypeReduce:
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
if (action.extra) {
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
} else {
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ", ");
add_action_flags(action);
add(")");
}
break;
default: {}
}
@ -351,6 +353,17 @@ class CCodeGenerator {
}
}
void add_action_flags(const ParseAction &action) {
if (action.fragile && action.can_hide_split)
add("FRAGILE|CAN_HIDE_SPLIT");
else if (action.fragile)
add("FRAGILE");
else if (action.can_hide_split)
add("CAN_HIDE_SPLIT");
else
add("0");
}
// Helper functions
string lex_state_index(size_t i) {

View file

@ -17,6 +17,9 @@ ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
rules::Associativity associativity,
const Production *production)
: type(type),
extra(false),
fragile(false),
can_hide_split(false),
symbol(symbol),
state_index(state_index),
consumed_symbol_count(consumed_symbol_count),
@ -26,6 +29,9 @@ ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
ParseAction::ParseAction()
: type(ParseActionTypeError),
extra(false),
fragile(false),
can_hide_split(false),
symbol(Symbol(-1)),
state_index(-1),
consumed_symbol_count(0),
@ -49,14 +55,17 @@ ParseAction ParseAction::Shift(ParseStateId state_index,
ParseAction ParseAction::ShiftExtra() {
ParseAction action;
action.type = ParseActionTypeShiftExtra;
action.type = ParseActionTypeShift;
action.extra = true;
return action;
}
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
ParseAction action;
action.type = ParseActionTypeReduceExtra;
action.type = ParseActionTypeReduce;
action.extra = true;
action.symbol = symbol;
action.consumed_symbol_count = 1;
return action;
}
@ -69,13 +78,11 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
}
bool ParseAction::operator==(const ParseAction &other) const {
return (
type == other.type &&
symbol == other.symbol &&
state_index == other.state_index &&
production == other.production &&
consumed_symbol_count == other.consumed_symbol_count
);
return (type == other.type && extra == other.extra &&
fragile == other.fragile && can_hide_split == other.can_hide_split &&
symbol == other.symbol && state_index == other.state_index &&
production == other.production &&
consumed_symbol_count == other.consumed_symbol_count);
}
bool ParseAction::operator<(const ParseAction &other) const {
@ -83,6 +90,18 @@ bool ParseAction::operator<(const ParseAction &other) const {
return true;
if (other.type < type)
return false;
if (extra && !other.extra)
return true;
if (other.extra && !extra)
return false;
if (fragile && !other.fragile)
return true;
if (other.fragile && !fragile)
return false;
if (can_hide_split && !other.can_hide_split)
return true;
if (other.can_hide_split && !can_hide_split)
return false;
if (symbol < other.symbol)
return true;
if (other.symbol < symbol)
@ -121,16 +140,20 @@ ParseStateId ParseTable::add_state() {
ParseAction &ParseTable::set_action(ParseStateId id, Symbol symbol,
ParseAction action) {
bool structural = action.type != ParseActionTypeShiftExtra;
symbols[symbol].structural += structural;
if (action.extra)
symbols[symbol];
else
symbols[symbol].structural = true;
states[id].actions[symbol] = vector<ParseAction>({ action });
return *states[id].actions[symbol].begin();
}
ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
ParseAction action) {
bool structural = action.type != ParseActionTypeShiftExtra;
symbols[symbol].structural += structural;
if (action.extra)
symbols[symbol];
else
symbols[symbol].structural = true;
states[id].actions[symbol].push_back(action);
return *states[id].actions[symbol].rbegin();
}

View file

@ -17,10 +17,6 @@ typedef uint64_t ParseStateId;
typedef enum {
ParseActionTypeError,
ParseActionTypeReduceExtra,
ParseActionTypeReduceFragile,
ParseActionTypeShiftExtra,
ParseActionTypeShift,
ParseActionTypeReduce,
ParseActionTypeAccept,
@ -45,6 +41,9 @@ class ParseAction {
bool operator<(const ParseAction &) const;
ParseActionType type;
bool extra;
bool fragile;
bool can_hide_split;
rules::Symbol symbol;
ParseStateId state_index;
size_t consumed_symbol_count;
@ -64,6 +63,8 @@ struct hash<tree_sitter::ParseAction> {
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.consumed_symbol_count) ^
hash<bool>()(action.extra) ^ hash<bool>()(action.fragile) ^
hash<bool>()(action.can_hide_split) ^
hash<int>()(action.associativity) ^
hash<int>()(action.precedence_range.min) ^
hash<int>()(action.precedence_range.max) ^

View file

@ -81,7 +81,8 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
vector<Production> productions;
for (const rule_ptr &rule_component : extract_choices(variable.rule)) {
Production production = FlattenRule().flatten(rule_component);
if (std::find(productions.begin(), productions.end(), production) == productions.end())
if (std::find(productions.begin(), productions.end(), production) ==
productions.end())
productions.push_back(production);
}
result.variables.push_back(

View file

@ -21,9 +21,7 @@ SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
rules::Associativity associativity)
: symbol(symbol),
precedence(precedence),
associativity(associativity) {}
: symbol(symbol), precedence(precedence), associativity(associativity) {}
bool ProductionStep::operator==(const ProductionStep &other) const {
return symbol == other.symbol && precedence == other.precedence &&