Fix conflation of finished items w/ different precedence

This commit is contained in:
Max Brunsfeld 2015-10-17 22:54:56 -07:00
parent 84fe01e145
commit 1983bcfb60
16 changed files with 19073 additions and 18250 deletions

View file

@ -47,9 +47,14 @@ class ParseTableBuilder {
conflict_manager(grammar) {}
pair<ParseTable, const GrammarError *> build() {
Symbol start_symbol = Symbol(0, grammar.variables.empty());
Production start_production({
ProductionStep(start_symbol, 0, rules::AssociativityNone, -2),
});
add_parse_state(ParseItemSet({
{
ParseItem(rules::START(), 0, 0, -2),
ParseItem(rules::START(), start_production, 0),
LookaheadSet({ rules::END_OF_INPUT() }),
},
}));
@ -92,7 +97,7 @@ class ParseTableBuilder {
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : item_set.transitions(grammar)) {
for (const auto &transition : item_set.transitions()) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
@ -113,12 +118,11 @@ class ParseTableBuilder {
CompletionStatus get_completion_status(const ParseItem &item) {
CompletionStatus result = { false, 0, rules::AssociativityNone };
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size()) {
if (item.step_index == item.production->size()) {
result.is_done = true;
if (item.step_index > 0) {
const ProductionStep &last_step = production[item.step_index - 1];
const ProductionStep &last_step =
item.production->at(item.step_index - 1);
result.precedence = last_step.precedence;
result.associativity = last_step.associativity;
}
@ -139,7 +143,7 @@ class ParseTableBuilder {
: ParseAction::Reduce(Symbol(item.variable_index), item.step_index,
completion_status.precedence,
completion_status.associativity,
item.production_index);
*item.production);
for (const auto &lookahead_sym : *lookahead_symbols.entries)
add_action(state_id, lookahead_sym, action, item_set);
@ -200,9 +204,9 @@ class ParseTableBuilder {
if (resolution.first)
return &parse_table.set_action(state_id, lookahead, new_action);
if (old_action.type == ParseActionTypeReduce)
parse_table.fragile_production_ids.insert(production_id(old_action));
parse_table.fragile_productions.insert(old_action.production);
if (new_action.type == ParseActionTypeReduce)
parse_table.fragile_production_ids.insert(production_id(new_action));
parse_table.fragile_productions.insert(new_action.production);
break;
}
@ -216,10 +220,6 @@ class ParseTableBuilder {
return nullptr;
}
pair<Symbol, int> production_id(const ParseAction &action) {
return { action.symbol, action.production_id };
}
bool handle_unresolved_conflict(const ParseItemSet &item_set,
const Symbol &lookahead) {
set<Symbol> involved_symbols;
@ -230,16 +230,14 @@ class ParseTableBuilder {
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_set = pair.second;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size()) {
if (item.step_index == item.production->size()) {
if (lookahead_set.contains(lookahead)) {
involved_symbols.insert(item.lhs());
reduce_items.insert(item);
}
} else {
Symbol next_symbol = production[item.step_index].symbol;
Symbol next_symbol = item.production->at(item.step_index).symbol;
if (item.step_index > 0) {
set<Symbol> first_set = get_first_set(next_symbol);
@ -284,8 +282,7 @@ class ParseTableBuilder {
string item_string(const ParseItem &item) const {
string result = symbol_name(item.lhs()) + " ->";
size_t i = 0;
for (const ProductionStep &step :
grammar.productions(item.lhs())[item.production_index]) {
for (const ProductionStep &step : *item.production) {
if (i == item.step_index)
result += " \u2022";
result += " " + symbol_name(step.symbol);
@ -303,12 +300,10 @@ class ParseTableBuilder {
while (!symbols_to_process.empty()) {
Symbol symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (result.insert(symbol).second) {
for (const Production &production : grammar.productions(symbol)) {
if (result.insert(symbol).second)
for (const Production &production : grammar.productions(symbol))
if (!production.empty())
symbols_to_process.push_back({ production[0].symbol });
}
}
symbols_to_process.push_back(production[0].symbol);
}
return result;
@ -318,10 +313,8 @@ class ParseTableBuilder {
PrecedenceRange result;
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index > 0)
result.add(production[item.step_index - 1].precedence);
result.add(item.production->at(item.step_index - 1).precedence);
}
return result;
}

View file

@ -34,14 +34,12 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
continue;
// If the item is at the end of its production, skip to the next item.
const Production &item_production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == item_production.size())
if (item.step_index == item.production->size())
continue;
// If the next symbol in the production is not a non-terminal, skip to the
// next item.
Symbol next_symbol = item_production[item.step_index].symbol;
Symbol next_symbol = item.production->at(item.step_index).symbol;
if (next_symbol.is_token || next_symbol.is_built_in())
continue;
@ -52,33 +50,29 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
// recursively-added follow symbols.
LookaheadSet next_lookahead_symbols;
size_t next_step = item.step_index + 1;
if (next_step == item_production.size()) {
if (next_step == item.production->size()) {
next_lookahead_symbols = lookahead_symbols;
} else {
vector<Symbol> symbols_to_process({ item_production[next_step].symbol });
vector<Symbol> symbols_to_process(
{ item.production->at(next_step).symbol });
while (!symbols_to_process.empty()) {
Symbol following_symbol = symbols_to_process.back();
Symbol symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (!next_lookahead_symbols.insert(following_symbol))
if (!next_lookahead_symbols.insert(symbol))
continue;
for (const auto &production : grammar.productions(following_symbol))
for (const Production &production : grammar.productions(symbol))
if (!production.empty())
symbols_to_process.push_back(production[0].symbol);
}
}
// Add each of the next symbol's productions to be processed recursively.
size_t i = 0;
for (const Production &production : grammar.productions(next_symbol)) {
int rule_id = production.empty() ? 0 : production[0].rule_id;
for (const Production &production : grammar.productions(next_symbol))
items_to_process.push_back({
ParseItem(next_symbol, i, 0, rule_id),
next_lookahead_symbols,
ParseItem(next_symbol, production, 0), next_lookahead_symbols,
});
i++;
}
}
return result;

View file

@ -7,21 +7,22 @@ namespace tree_sitter {
namespace build_tables {
using std::map;
using std::pair;
using std::string;
using std::to_string;
using std::hash;
using rules::Symbol;
ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,
unsigned int step_index, int rule_id)
ParseItem::ParseItem(const Symbol &lhs, const Production &production,
unsigned int step_index)
: variable_index(lhs.index),
production_index(production_index),
step_index(step_index),
rule_id(rule_id) {}
production(&production),
step_index(step_index) {}
bool ParseItem::operator==(const ParseItem &other) const {
return (variable_index == other.variable_index) &&
(rule_id == other.rule_id) && (step_index == other.step_index);
return ((variable_index == other.variable_index) &&
(step_index == other.step_index) &&
(remaining_rule_id() == other.remaining_rule_id()));
}
bool ParseItem::operator<(const ParseItem &other) const {
@ -33,13 +34,31 @@ bool ParseItem::operator<(const ParseItem &other) const {
return true;
if (step_index > other.step_index)
return false;
return rule_id < other.rule_id;
return remaining_rule_id() < other.remaining_rule_id();
}
Symbol ParseItem::lhs() const {
return Symbol(variable_index);
}
pair<int, int> ParseItem::remaining_rule_id() const {
if (production->empty())
return { -2, -1 };
else if (step_index < production->size())
return { -1, production->at(step_index).rule_id };
else
return { production->back().associativity, production->back().precedence };
}
size_t ParseItem::Hash::operator()(const ParseItem &item) const {
size_t result = hash<int>()(item.variable_index);
result ^= hash<unsigned int>()(item.step_index);
result ^= hash<size_t>()(item.production->size());
pair<int, int> id = item.remaining_rule_id();
result ^= hash<int>()(id.first) ^ hash<int>()(id.second);
return result;
}
ParseItemSet::ParseItemSet() {}
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
@ -53,33 +72,27 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.entries.size());
for (auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
result ^= hash<unsigned int>()(item.variable_index) ^
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
result ^= ParseItem::Hash()(item);
const LookaheadSet &lookahead_set = pair.second;
result ^= hash<size_t>()(lookahead_set.entries->size());
for (auto &symbol : *pair.second.entries) {
for (auto &symbol : *pair.second.entries)
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
}
return result;
}
map<Symbol, ParseItemSet> ParseItemSet::transitions(
const SyntaxGrammar &grammar) const {
map<Symbol, ParseItemSet> ParseItemSet::transitions() const {
map<Symbol, ParseItemSet> result;
for (const auto &pair : entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_symbols = pair.second;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size())
if (item.step_index == item.production->size())
continue;
size_t step = item.step_index + 1;
Symbol symbol = production[item.step_index].symbol;
int rule_id = step < production.size() ? production[step].rule_id : 0;
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
Symbol symbol = item.production->at(item.step_index).symbol;
ParseItem new_item(item.lhs(), *item.production, step);
result[symbol].entries[new_item] = lookahead_symbols;
}

View file

@ -2,6 +2,7 @@
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#include <map>
#include <utility>
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/syntax_grammar.h"
@ -11,16 +12,20 @@ namespace build_tables {
class ParseItem {
public:
ParseItem(const rules::Symbol &, unsigned int, unsigned int, int);
ParseItem(const rules::Symbol &, const Production &, unsigned int);
bool operator==(const ParseItem &other) const;
bool operator<(const ParseItem &other) const;
rules::Symbol lhs() const;
std::pair<int, int> remaining_rule_id() const;
int variable_index;
unsigned int production_index;
const Production *production;
unsigned int step_index;
int rule_id;
struct Hash {
size_t operator()(const ParseItem &) const;
};
};
class ParseItemSet {
@ -28,7 +33,7 @@ class ParseItemSet {
ParseItemSet();
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
std::map<rules::Symbol, ParseItemSet> transitions(const SyntaxGrammar &) const;
std::map<rules::Symbol, ParseItemSet> transitions() const;
bool operator==(const ParseItemSet &) const;
std::map<ParseItem, LookaheadSet> entries;