Fix conflation of finished items w/ different precedence

This commit is contained in:
Max Brunsfeld 2015-10-17 22:54:56 -07:00
parent 84fe01e145
commit 1983bcfb60
16 changed files with 19073 additions and 18250 deletions

View file

@ -47,9 +47,14 @@ class ParseTableBuilder {
conflict_manager(grammar) {}
pair<ParseTable, const GrammarError *> build() {
Symbol start_symbol = Symbol(0, grammar.variables.empty());
Production start_production({
ProductionStep(start_symbol, 0, rules::AssociativityNone, -2),
});
add_parse_state(ParseItemSet({
{
ParseItem(rules::START(), 0, 0, -2),
ParseItem(rules::START(), start_production, 0),
LookaheadSet({ rules::END_OF_INPUT() }),
},
}));
@ -92,7 +97,7 @@ class ParseTableBuilder {
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : item_set.transitions(grammar)) {
for (const auto &transition : item_set.transitions()) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
@ -113,12 +118,11 @@ class ParseTableBuilder {
CompletionStatus get_completion_status(const ParseItem &item) {
CompletionStatus result = { false, 0, rules::AssociativityNone };
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size()) {
if (item.step_index == item.production->size()) {
result.is_done = true;
if (item.step_index > 0) {
const ProductionStep &last_step = production[item.step_index - 1];
const ProductionStep &last_step =
item.production->at(item.step_index - 1);
result.precedence = last_step.precedence;
result.associativity = last_step.associativity;
}
@ -139,7 +143,7 @@ class ParseTableBuilder {
: ParseAction::Reduce(Symbol(item.variable_index), item.step_index,
completion_status.precedence,
completion_status.associativity,
item.production_index);
*item.production);
for (const auto &lookahead_sym : *lookahead_symbols.entries)
add_action(state_id, lookahead_sym, action, item_set);
@ -200,9 +204,9 @@ class ParseTableBuilder {
if (resolution.first)
return &parse_table.set_action(state_id, lookahead, new_action);
if (old_action.type == ParseActionTypeReduce)
parse_table.fragile_production_ids.insert(production_id(old_action));
parse_table.fragile_productions.insert(old_action.production);
if (new_action.type == ParseActionTypeReduce)
parse_table.fragile_production_ids.insert(production_id(new_action));
parse_table.fragile_productions.insert(new_action.production);
break;
}
@ -216,10 +220,6 @@ class ParseTableBuilder {
return nullptr;
}
pair<Symbol, int> production_id(const ParseAction &action) {
return { action.symbol, action.production_id };
}
bool handle_unresolved_conflict(const ParseItemSet &item_set,
const Symbol &lookahead) {
set<Symbol> involved_symbols;
@ -230,16 +230,14 @@ class ParseTableBuilder {
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_set = pair.second;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size()) {
if (item.step_index == item.production->size()) {
if (lookahead_set.contains(lookahead)) {
involved_symbols.insert(item.lhs());
reduce_items.insert(item);
}
} else {
Symbol next_symbol = production[item.step_index].symbol;
Symbol next_symbol = item.production->at(item.step_index).symbol;
if (item.step_index > 0) {
set<Symbol> first_set = get_first_set(next_symbol);
@ -284,8 +282,7 @@ class ParseTableBuilder {
string item_string(const ParseItem &item) const {
string result = symbol_name(item.lhs()) + " ->";
size_t i = 0;
for (const ProductionStep &step :
grammar.productions(item.lhs())[item.production_index]) {
for (const ProductionStep &step : *item.production) {
if (i == item.step_index)
result += " \u2022";
result += " " + symbol_name(step.symbol);
@ -303,12 +300,10 @@ class ParseTableBuilder {
while (!symbols_to_process.empty()) {
Symbol symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (result.insert(symbol).second) {
for (const Production &production : grammar.productions(symbol)) {
if (result.insert(symbol).second)
for (const Production &production : grammar.productions(symbol))
if (!production.empty())
symbols_to_process.push_back({ production[0].symbol });
}
}
symbols_to_process.push_back(production[0].symbol);
}
return result;
@ -318,10 +313,8 @@ class ParseTableBuilder {
PrecedenceRange result;
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index > 0)
result.add(production[item.step_index - 1].precedence);
result.add(item.production->at(item.step_index - 1).precedence);
}
return result;
}

View file

@ -34,14 +34,12 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
continue;
// If the item is at the end of its production, skip to the next item.
const Production &item_production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == item_production.size())
if (item.step_index == item.production->size())
continue;
// If the next symbol in the production is not a non-terminal, skip to the
// next item.
Symbol next_symbol = item_production[item.step_index].symbol;
Symbol next_symbol = item.production->at(item.step_index).symbol;
if (next_symbol.is_token || next_symbol.is_built_in())
continue;
@ -52,33 +50,29 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
// recursively-added follow symbols.
LookaheadSet next_lookahead_symbols;
size_t next_step = item.step_index + 1;
if (next_step == item_production.size()) {
if (next_step == item.production->size()) {
next_lookahead_symbols = lookahead_symbols;
} else {
vector<Symbol> symbols_to_process({ item_production[next_step].symbol });
vector<Symbol> symbols_to_process(
{ item.production->at(next_step).symbol });
while (!symbols_to_process.empty()) {
Symbol following_symbol = symbols_to_process.back();
Symbol symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (!next_lookahead_symbols.insert(following_symbol))
if (!next_lookahead_symbols.insert(symbol))
continue;
for (const auto &production : grammar.productions(following_symbol))
for (const Production &production : grammar.productions(symbol))
if (!production.empty())
symbols_to_process.push_back(production[0].symbol);
}
}
// Add each of the next symbol's productions to be processed recursively.
size_t i = 0;
for (const Production &production : grammar.productions(next_symbol)) {
int rule_id = production.empty() ? 0 : production[0].rule_id;
for (const Production &production : grammar.productions(next_symbol))
items_to_process.push_back({
ParseItem(next_symbol, i, 0, rule_id),
next_lookahead_symbols,
ParseItem(next_symbol, production, 0), next_lookahead_symbols,
});
i++;
}
}
return result;

View file

@ -7,21 +7,22 @@ namespace tree_sitter {
namespace build_tables {
using std::map;
using std::pair;
using std::string;
using std::to_string;
using std::hash;
using rules::Symbol;
ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,
unsigned int step_index, int rule_id)
ParseItem::ParseItem(const Symbol &lhs, const Production &production,
unsigned int step_index)
: variable_index(lhs.index),
production_index(production_index),
step_index(step_index),
rule_id(rule_id) {}
production(&production),
step_index(step_index) {}
bool ParseItem::operator==(const ParseItem &other) const {
return (variable_index == other.variable_index) &&
(rule_id == other.rule_id) && (step_index == other.step_index);
return ((variable_index == other.variable_index) &&
(step_index == other.step_index) &&
(remaining_rule_id() == other.remaining_rule_id()));
}
bool ParseItem::operator<(const ParseItem &other) const {
@ -33,13 +34,31 @@ bool ParseItem::operator<(const ParseItem &other) const {
return true;
if (step_index > other.step_index)
return false;
return rule_id < other.rule_id;
return remaining_rule_id() < other.remaining_rule_id();
}
Symbol ParseItem::lhs() const {
return Symbol(variable_index);
}
pair<int, int> ParseItem::remaining_rule_id() const {
if (production->empty())
return { -2, -1 };
else if (step_index < production->size())
return { -1, production->at(step_index).rule_id };
else
return { production->back().associativity, production->back().precedence };
}
size_t ParseItem::Hash::operator()(const ParseItem &item) const {
size_t result = hash<int>()(item.variable_index);
result ^= hash<unsigned int>()(item.step_index);
result ^= hash<size_t>()(item.production->size());
pair<int, int> id = item.remaining_rule_id();
result ^= hash<int>()(id.first) ^ hash<int>()(id.second);
return result;
}
ParseItemSet::ParseItemSet() {}
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
@ -53,33 +72,27 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const {
size_t result = hash<size_t>()(item_set.entries.size());
for (auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
result ^= hash<unsigned int>()(item.variable_index) ^
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
result ^= ParseItem::Hash()(item);
const LookaheadSet &lookahead_set = pair.second;
result ^= hash<size_t>()(lookahead_set.entries->size());
for (auto &symbol : *pair.second.entries) {
for (auto &symbol : *pair.second.entries)
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
}
}
return result;
}
map<Symbol, ParseItemSet> ParseItemSet::transitions(
const SyntaxGrammar &grammar) const {
map<Symbol, ParseItemSet> ParseItemSet::transitions() const {
map<Symbol, ParseItemSet> result;
for (const auto &pair : entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_symbols = pair.second;
const Production &production =
grammar.productions(item.lhs())[item.production_index];
if (item.step_index == production.size())
if (item.step_index == item.production->size())
continue;
size_t step = item.step_index + 1;
Symbol symbol = production[item.step_index].symbol;
int rule_id = step < production.size() ? production[step].rule_id : 0;
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
Symbol symbol = item.production->at(item.step_index).symbol;
ParseItem new_item(item.lhs(), *item.production, step);
result[symbol].entries[new_item] = lookahead_symbols;
}

View file

@ -2,6 +2,7 @@
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
#include <map>
#include <utility>
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/syntax_grammar.h"
@ -11,16 +12,20 @@ namespace build_tables {
class ParseItem {
public:
ParseItem(const rules::Symbol &, unsigned int, unsigned int, int);
ParseItem(const rules::Symbol &, const Production &, unsigned int);
bool operator==(const ParseItem &other) const;
bool operator<(const ParseItem &other) const;
rules::Symbol lhs() const;
std::pair<int, int> remaining_rule_id() const;
int variable_index;
unsigned int production_index;
const Production *production;
unsigned int step_index;
int rule_id;
struct Hash {
size_t operator()(const ParseItem &) const;
};
};
class ParseItemSet {
@ -28,7 +33,7 @@ class ParseItemSet {
ParseItemSet();
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
std::map<rules::Symbol, ParseItemSet> transitions(const SyntaxGrammar &) const;
std::map<rules::Symbol, ParseItemSet> transitions() const;
bool operator==(const ParseItemSet &) const;
std::map<ParseItem, LookaheadSet> entries;

View file

@ -380,9 +380,8 @@ class CCodeGenerator {
}
bool reduce_action_is_fragile(const ParseAction &action) const {
return parse_table.fragile_production_ids.find(
{ action.symbol, action.production_id }) !=
parse_table.fragile_production_ids.end();
return parse_table.fragile_productions.find(action.production) !=
parse_table.fragile_productions.end();
}
// C-code generation functions

View file

@ -14,14 +14,15 @@ using rules::Symbol;
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
Symbol symbol, size_t consumed_symbol_count,
PrecedenceRange precedence_range,
rules::Associativity associativity, int production_id)
rules::Associativity associativity,
const Production *production)
: type(type),
symbol(symbol),
state_index(state_index),
consumed_symbol_count(consumed_symbol_count),
precedence_range(precedence_range),
associativity(associativity),
production_id(production_id) {}
production(production) {}
ParseAction::ParseAction()
: type(ParseActionTypeError),
@ -43,7 +44,7 @@ ParseAction ParseAction::Accept() {
ParseAction ParseAction::Shift(ParseStateId state_index,
PrecedenceRange precedence_range) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0,
precedence_range, rules::AssociativityNone, -1);
precedence_range, rules::AssociativityNone, nullptr);
}
ParseAction ParseAction::ShiftExtra() {
@ -62,9 +63,9 @@ ParseAction ParseAction::ReduceExtra(Symbol symbol) {
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
int precedence,
rules::Associativity associativity,
unsigned int production_id) {
const Production &production) {
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
{ precedence, precedence }, associativity, production_id);
{ precedence, precedence }, associativity, &production);
}
bool ParseAction::operator==(const ParseAction &other) const {

View file

@ -9,6 +9,7 @@
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/precedence_range.h"
#include "compiler/syntax_grammar.h"
namespace tree_sitter {
@ -27,7 +28,7 @@ typedef enum {
class ParseAction {
ParseAction(ParseActionType type, ParseStateId state_index,
rules::Symbol symbol, size_t consumed_symbol_count,
PrecedenceRange range, rules::Associativity, int production_id);
PrecedenceRange range, rules::Associativity, const Production *);
public:
ParseAction();
@ -36,7 +37,7 @@ class ParseAction {
static ParseAction Shift(ParseStateId state_index, PrecedenceRange precedence);
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
int precedence, rules::Associativity,
unsigned int production_id);
const Production &);
static ParseAction ShiftExtra();
static ParseAction ReduceExtra(rules::Symbol symbol);
bool operator==(const ParseAction &) const;
@ -48,7 +49,7 @@ class ParseAction {
size_t consumed_symbol_count;
PrecedenceRange precedence_range;
rules::Associativity associativity;
int production_id;
const Production *production;
};
} // namespace tree_sitter
@ -65,7 +66,7 @@ struct hash<tree_sitter::ParseAction> {
hash<int>()(action.associativity) ^
hash<int>()(action.precedence_range.min) ^
hash<int>()(action.precedence_range.max) ^
hash<size_t>()(action.production_id));
hash<const void *>()(&action.production));
}
};
@ -91,7 +92,7 @@ class ParseTable {
std::vector<ParseState> states;
std::set<rules::Symbol> symbols;
std::set<std::pair<rules::Symbol, unsigned int>> fragile_production_ids;
std::set<const Production *> fragile_productions;
};
} // namespace tree_sitter

View file

@ -13,16 +13,7 @@ using std::pair;
using std::vector;
using std::set;
static const vector<Production> START_PRODUCTIONS_TOKEN_ONLY({
Production({ ProductionStep(rules::Symbol(0, true), 0,
rules::AssociativityNone) }),
});
static const vector<Production> START_PRODUCTIONS({
Production({ ProductionStep(rules::Symbol(0), 0, rules::AssociativityNone) }),
});
static const vector<Production> NO_PRODUCTIONS({});
static const vector<Production> NO_PRODUCTIONS;
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
const vector<Production> &productions)
@ -49,12 +40,7 @@ bool ProductionStep::operator==(const ProductionStep &other) const {
const vector<Production> &SyntaxGrammar::productions(
const rules::Symbol &symbol) const {
if (symbol == rules::START()) {
if (variables.empty())
return START_PRODUCTIONS_TOKEN_ONLY;
else
return START_PRODUCTIONS;
} else if (symbol.is_built_in() || symbol.is_token) {
if (symbol.is_built_in() || symbol.is_token) {
return NO_PRODUCTIONS;
} else {
return variables[symbol.index].productions;