Fix conflation of finished items w/ different precedence
This commit is contained in:
parent
84fe01e145
commit
1983bcfb60
16 changed files with 19073 additions and 18250 deletions
|
|
@ -47,9 +47,14 @@ class ParseTableBuilder {
|
|||
conflict_manager(grammar) {}
|
||||
|
||||
pair<ParseTable, const GrammarError *> build() {
|
||||
Symbol start_symbol = Symbol(0, grammar.variables.empty());
|
||||
Production start_production({
|
||||
ProductionStep(start_symbol, 0, rules::AssociativityNone, -2),
|
||||
});
|
||||
|
||||
add_parse_state(ParseItemSet({
|
||||
{
|
||||
ParseItem(rules::START(), 0, 0, -2),
|
||||
ParseItem(rules::START(), start_production, 0),
|
||||
LookaheadSet({ rules::END_OF_INPUT() }),
|
||||
},
|
||||
}));
|
||||
|
|
@ -92,7 +97,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &transition : item_set.transitions(grammar)) {
|
||||
for (const auto &transition : item_set.transitions()) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &next_item_set = transition.second;
|
||||
|
||||
|
|
@ -113,12 +118,11 @@ class ParseTableBuilder {
|
|||
|
||||
CompletionStatus get_completion_status(const ParseItem &item) {
|
||||
CompletionStatus result = { false, 0, rules::AssociativityNone };
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index == production.size()) {
|
||||
if (item.step_index == item.production->size()) {
|
||||
result.is_done = true;
|
||||
if (item.step_index > 0) {
|
||||
const ProductionStep &last_step = production[item.step_index - 1];
|
||||
const ProductionStep &last_step =
|
||||
item.production->at(item.step_index - 1);
|
||||
result.precedence = last_step.precedence;
|
||||
result.associativity = last_step.associativity;
|
||||
}
|
||||
|
|
@ -139,7 +143,7 @@ class ParseTableBuilder {
|
|||
: ParseAction::Reduce(Symbol(item.variable_index), item.step_index,
|
||||
completion_status.precedence,
|
||||
completion_status.associativity,
|
||||
item.production_index);
|
||||
*item.production);
|
||||
|
||||
for (const auto &lookahead_sym : *lookahead_symbols.entries)
|
||||
add_action(state_id, lookahead_sym, action, item_set);
|
||||
|
|
@ -200,9 +204,9 @@ class ParseTableBuilder {
|
|||
if (resolution.first)
|
||||
return &parse_table.set_action(state_id, lookahead, new_action);
|
||||
if (old_action.type == ParseActionTypeReduce)
|
||||
parse_table.fragile_production_ids.insert(production_id(old_action));
|
||||
parse_table.fragile_productions.insert(old_action.production);
|
||||
if (new_action.type == ParseActionTypeReduce)
|
||||
parse_table.fragile_production_ids.insert(production_id(new_action));
|
||||
parse_table.fragile_productions.insert(new_action.production);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -216,10 +220,6 @@ class ParseTableBuilder {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
pair<Symbol, int> production_id(const ParseAction &action) {
|
||||
return { action.symbol, action.production_id };
|
||||
}
|
||||
|
||||
bool handle_unresolved_conflict(const ParseItemSet &item_set,
|
||||
const Symbol &lookahead) {
|
||||
set<Symbol> involved_symbols;
|
||||
|
|
@ -230,16 +230,14 @@ class ParseTableBuilder {
|
|||
for (const auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookahead_set = pair.second;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
|
||||
if (item.step_index == production.size()) {
|
||||
if (item.step_index == item.production->size()) {
|
||||
if (lookahead_set.contains(lookahead)) {
|
||||
involved_symbols.insert(item.lhs());
|
||||
reduce_items.insert(item);
|
||||
}
|
||||
} else {
|
||||
Symbol next_symbol = production[item.step_index].symbol;
|
||||
Symbol next_symbol = item.production->at(item.step_index).symbol;
|
||||
|
||||
if (item.step_index > 0) {
|
||||
set<Symbol> first_set = get_first_set(next_symbol);
|
||||
|
|
@ -284,8 +282,7 @@ class ParseTableBuilder {
|
|||
string item_string(const ParseItem &item) const {
|
||||
string result = symbol_name(item.lhs()) + " ->";
|
||||
size_t i = 0;
|
||||
for (const ProductionStep &step :
|
||||
grammar.productions(item.lhs())[item.production_index]) {
|
||||
for (const ProductionStep &step : *item.production) {
|
||||
if (i == item.step_index)
|
||||
result += " \u2022";
|
||||
result += " " + symbol_name(step.symbol);
|
||||
|
|
@ -303,12 +300,10 @@ class ParseTableBuilder {
|
|||
while (!symbols_to_process.empty()) {
|
||||
Symbol symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
if (result.insert(symbol).second) {
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
if (result.insert(symbol).second)
|
||||
for (const Production &production : grammar.productions(symbol))
|
||||
if (!production.empty())
|
||||
symbols_to_process.push_back({ production[0].symbol });
|
||||
}
|
||||
}
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -318,10 +313,8 @@ class ParseTableBuilder {
|
|||
PrecedenceRange result;
|
||||
for (const auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index > 0)
|
||||
result.add(production[item.step_index - 1].precedence);
|
||||
result.add(item.production->at(item.step_index - 1).precedence);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,14 +34,12 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
|
|||
continue;
|
||||
|
||||
// If the item is at the end of its production, skip to the next item.
|
||||
const Production &item_production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index == item_production.size())
|
||||
if (item.step_index == item.production->size())
|
||||
continue;
|
||||
|
||||
// If the next symbol in the production is not a non-terminal, skip to the
|
||||
// next item.
|
||||
Symbol next_symbol = item_production[item.step_index].symbol;
|
||||
Symbol next_symbol = item.production->at(item.step_index).symbol;
|
||||
if (next_symbol.is_token || next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
|
|
@ -52,33 +50,29 @@ ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
|
|||
// recursively-added follow symbols.
|
||||
LookaheadSet next_lookahead_symbols;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item_production.size()) {
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
vector<Symbol> symbols_to_process({ item_production[next_step].symbol });
|
||||
vector<Symbol> symbols_to_process(
|
||||
{ item.production->at(next_step).symbol });
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol following_symbol = symbols_to_process.back();
|
||||
Symbol symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
||||
if (!next_lookahead_symbols.insert(following_symbol))
|
||||
if (!next_lookahead_symbols.insert(symbol))
|
||||
continue;
|
||||
|
||||
for (const auto &production : grammar.productions(following_symbol))
|
||||
for (const Production &production : grammar.productions(symbol))
|
||||
if (!production.empty())
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
|
||||
// Add each of the next symbol's productions to be processed recursively.
|
||||
size_t i = 0;
|
||||
for (const Production &production : grammar.productions(next_symbol)) {
|
||||
int rule_id = production.empty() ? 0 : production[0].rule_id;
|
||||
for (const Production &production : grammar.productions(next_symbol))
|
||||
items_to_process.push_back({
|
||||
ParseItem(next_symbol, i, 0, rule_id),
|
||||
next_lookahead_symbols,
|
||||
ParseItem(next_symbol, production, 0), next_lookahead_symbols,
|
||||
});
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -7,21 +7,22 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::hash;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,
|
||||
unsigned int step_index, int rule_id)
|
||||
ParseItem::ParseItem(const Symbol &lhs, const Production &production,
|
||||
unsigned int step_index)
|
||||
: variable_index(lhs.index),
|
||||
production_index(production_index),
|
||||
step_index(step_index),
|
||||
rule_id(rule_id) {}
|
||||
production(&production),
|
||||
step_index(step_index) {}
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
return (variable_index == other.variable_index) &&
|
||||
(rule_id == other.rule_id) && (step_index == other.step_index);
|
||||
return ((variable_index == other.variable_index) &&
|
||||
(step_index == other.step_index) &&
|
||||
(remaining_rule_id() == other.remaining_rule_id()));
|
||||
}
|
||||
|
||||
bool ParseItem::operator<(const ParseItem &other) const {
|
||||
|
|
@ -33,13 +34,31 @@ bool ParseItem::operator<(const ParseItem &other) const {
|
|||
return true;
|
||||
if (step_index > other.step_index)
|
||||
return false;
|
||||
return rule_id < other.rule_id;
|
||||
return remaining_rule_id() < other.remaining_rule_id();
|
||||
}
|
||||
|
||||
Symbol ParseItem::lhs() const {
|
||||
return Symbol(variable_index);
|
||||
}
|
||||
|
||||
pair<int, int> ParseItem::remaining_rule_id() const {
|
||||
if (production->empty())
|
||||
return { -2, -1 };
|
||||
else if (step_index < production->size())
|
||||
return { -1, production->at(step_index).rule_id };
|
||||
else
|
||||
return { production->back().associativity, production->back().precedence };
|
||||
}
|
||||
|
||||
size_t ParseItem::Hash::operator()(const ParseItem &item) const {
|
||||
size_t result = hash<int>()(item.variable_index);
|
||||
result ^= hash<unsigned int>()(item.step_index);
|
||||
result ^= hash<size_t>()(item.production->size());
|
||||
pair<int, int> id = item.remaining_rule_id();
|
||||
result ^= hash<int>()(id.first) ^ hash<int>()(id.second);
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseItemSet::ParseItemSet() {}
|
||||
|
||||
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
|
||||
|
|
@ -53,33 +72,27 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const {
|
|||
size_t result = hash<size_t>()(item_set.entries.size());
|
||||
for (auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
result ^= hash<unsigned int>()(item.variable_index) ^
|
||||
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
|
||||
result ^= ParseItem::Hash()(item);
|
||||
|
||||
const LookaheadSet &lookahead_set = pair.second;
|
||||
result ^= hash<size_t>()(lookahead_set.entries->size());
|
||||
for (auto &symbol : *pair.second.entries) {
|
||||
for (auto &symbol : *pair.second.entries)
|
||||
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
map<Symbol, ParseItemSet> ParseItemSet::transitions(
|
||||
const SyntaxGrammar &grammar) const {
|
||||
map<Symbol, ParseItemSet> ParseItemSet::transitions() const {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const auto &pair : entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookahead_symbols = pair.second;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index == production.size())
|
||||
if (item.step_index == item.production->size())
|
||||
continue;
|
||||
|
||||
size_t step = item.step_index + 1;
|
||||
Symbol symbol = production[item.step_index].symbol;
|
||||
int rule_id = step < production.size() ? production[step].rule_id : 0;
|
||||
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
|
||||
Symbol symbol = item.production->at(item.step_index).symbol;
|
||||
ParseItem new_item(item.lhs(), *item.production, step);
|
||||
|
||||
result[symbol].entries[new_item] = lookahead_symbols;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
|
@ -11,16 +12,20 @@ namespace build_tables {
|
|||
|
||||
class ParseItem {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &, unsigned int, unsigned int, int);
|
||||
ParseItem(const rules::Symbol &, const Production &, unsigned int);
|
||||
|
||||
bool operator==(const ParseItem &other) const;
|
||||
bool operator<(const ParseItem &other) const;
|
||||
rules::Symbol lhs() const;
|
||||
std::pair<int, int> remaining_rule_id() const;
|
||||
|
||||
int variable_index;
|
||||
unsigned int production_index;
|
||||
const Production *production;
|
||||
unsigned int step_index;
|
||||
int rule_id;
|
||||
|
||||
struct Hash {
|
||||
size_t operator()(const ParseItem &) const;
|
||||
};
|
||||
};
|
||||
|
||||
class ParseItemSet {
|
||||
|
|
@ -28,7 +33,7 @@ class ParseItemSet {
|
|||
ParseItemSet();
|
||||
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
|
||||
|
||||
std::map<rules::Symbol, ParseItemSet> transitions(const SyntaxGrammar &) const;
|
||||
std::map<rules::Symbol, ParseItemSet> transitions() const;
|
||||
bool operator==(const ParseItemSet &) const;
|
||||
|
||||
std::map<ParseItem, LookaheadSet> entries;
|
||||
|
|
|
|||
|
|
@ -380,9 +380,8 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
bool reduce_action_is_fragile(const ParseAction &action) const {
|
||||
return parse_table.fragile_production_ids.find(
|
||||
{ action.symbol, action.production_id }) !=
|
||||
parse_table.fragile_production_ids.end();
|
||||
return parse_table.fragile_productions.find(action.production) !=
|
||||
parse_table.fragile_productions.end();
|
||||
}
|
||||
|
||||
// C-code generation functions
|
||||
|
|
|
|||
|
|
@ -14,14 +14,15 @@ using rules::Symbol;
|
|||
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
Symbol symbol, size_t consumed_symbol_count,
|
||||
PrecedenceRange precedence_range,
|
||||
rules::Associativity associativity, int production_id)
|
||||
rules::Associativity associativity,
|
||||
const Production *production)
|
||||
: type(type),
|
||||
symbol(symbol),
|
||||
state_index(state_index),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
precedence_range(precedence_range),
|
||||
associativity(associativity),
|
||||
production_id(production_id) {}
|
||||
production(production) {}
|
||||
|
||||
ParseAction::ParseAction()
|
||||
: type(ParseActionTypeError),
|
||||
|
|
@ -43,7 +44,7 @@ ParseAction ParseAction::Accept() {
|
|||
ParseAction ParseAction::Shift(ParseStateId state_index,
|
||||
PrecedenceRange precedence_range) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0,
|
||||
precedence_range, rules::AssociativityNone, -1);
|
||||
precedence_range, rules::AssociativityNone, nullptr);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::ShiftExtra() {
|
||||
|
|
@ -62,9 +63,9 @@ ParseAction ParseAction::ReduceExtra(Symbol symbol) {
|
|||
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
|
||||
int precedence,
|
||||
rules::Associativity associativity,
|
||||
unsigned int production_id) {
|
||||
const Production &production) {
|
||||
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
|
||||
{ precedence, precedence }, associativity, production_id);
|
||||
{ precedence, precedence }, associativity, &production);
|
||||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/precedence_range.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -27,7 +28,7 @@ typedef enum {
|
|||
class ParseAction {
|
||||
ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
rules::Symbol symbol, size_t consumed_symbol_count,
|
||||
PrecedenceRange range, rules::Associativity, int production_id);
|
||||
PrecedenceRange range, rules::Associativity, const Production *);
|
||||
|
||||
public:
|
||||
ParseAction();
|
||||
|
|
@ -36,7 +37,7 @@ class ParseAction {
|
|||
static ParseAction Shift(ParseStateId state_index, PrecedenceRange precedence);
|
||||
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
|
||||
int precedence, rules::Associativity,
|
||||
unsigned int production_id);
|
||||
const Production &);
|
||||
static ParseAction ShiftExtra();
|
||||
static ParseAction ReduceExtra(rules::Symbol symbol);
|
||||
bool operator==(const ParseAction &) const;
|
||||
|
|
@ -48,7 +49,7 @@ class ParseAction {
|
|||
size_t consumed_symbol_count;
|
||||
PrecedenceRange precedence_range;
|
||||
rules::Associativity associativity;
|
||||
int production_id;
|
||||
const Production *production;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -65,7 +66,7 @@ struct hash<tree_sitter::ParseAction> {
|
|||
hash<int>()(action.associativity) ^
|
||||
hash<int>()(action.precedence_range.min) ^
|
||||
hash<int>()(action.precedence_range.max) ^
|
||||
hash<size_t>()(action.production_id));
|
||||
hash<const void *>()(&action.production));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -91,7 +92,7 @@ class ParseTable {
|
|||
|
||||
std::vector<ParseState> states;
|
||||
std::set<rules::Symbol> symbols;
|
||||
std::set<std::pair<rules::Symbol, unsigned int>> fragile_production_ids;
|
||||
std::set<const Production *> fragile_productions;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -13,16 +13,7 @@ using std::pair;
|
|||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
static const vector<Production> START_PRODUCTIONS_TOKEN_ONLY({
|
||||
Production({ ProductionStep(rules::Symbol(0, true), 0,
|
||||
rules::AssociativityNone) }),
|
||||
});
|
||||
|
||||
static const vector<Production> START_PRODUCTIONS({
|
||||
Production({ ProductionStep(rules::Symbol(0), 0, rules::AssociativityNone) }),
|
||||
});
|
||||
|
||||
static const vector<Production> NO_PRODUCTIONS({});
|
||||
static const vector<Production> NO_PRODUCTIONS;
|
||||
|
||||
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
|
||||
const vector<Production> &productions)
|
||||
|
|
@ -49,12 +40,7 @@ bool ProductionStep::operator==(const ProductionStep &other) const {
|
|||
|
||||
const vector<Production> &SyntaxGrammar::productions(
|
||||
const rules::Symbol &symbol) const {
|
||||
if (symbol == rules::START()) {
|
||||
if (variables.empty())
|
||||
return START_PRODUCTIONS_TOKEN_ONLY;
|
||||
else
|
||||
return START_PRODUCTIONS;
|
||||
} else if (symbol.is_built_in() || symbol.is_token) {
|
||||
if (symbol.is_built_in() || symbol.is_token) {
|
||||
return NO_PRODUCTIONS;
|
||||
} else {
|
||||
return variables[symbol.index].productions;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue