In LR(1) items, only store consumed symbols as booleans

the booleans represent the symbols point to auxiliary tokens
or not. This is all we need to know for the purpose of building
parse tables. Any other information just leads to redundant
parse states.
This commit is contained in:
Max Brunsfeld 2014-01-31 00:13:05 -08:00
parent 0d3a941848
commit 5ed5ae7514
11 changed files with 660 additions and 993 deletions

View file

@ -66,7 +66,7 @@ namespace tree_sitter {
return lhs_eq && rules_eq;
}
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, const vector<Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym) :
ParseItem::ParseItem(const Symbol &lhs, const rules::rule_ptr rule, const vector<bool> &consumed_symbols, const rules::Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_symbols(consumed_symbols),
lookahead_sym(lookahead_sym) {}

View file

@ -30,11 +30,11 @@ namespace tree_sitter {
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, const std::vector<rules::Symbol> &consumed_symbols, const rules::Symbol &lookahead_sym);
ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule, const std::vector<bool> &consumed_symbols, const rules::Symbol &lookahead_sym);
bool operator<(const ParseItem &other) const;
bool operator==(const ParseItem &other) const;
const std::vector<rules::Symbol> consumed_symbols;
const std::vector<bool> consumed_symbols;
const rules::Symbol lookahead_sym;
};
@ -60,10 +60,10 @@ namespace std {
struct hash<tree_sitter::build_tables::ParseItem> {
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
return
hash<tree_sitter::rules::Symbol>()(item.lhs) ^
hash<string>()(item.lhs.name) ^
hash<tree_sitter::rules::Rule>()(*item.rule) ^
hash<size_t>()(item.consumed_symbols.size()) ^
hash<tree_sitter::rules::Symbol>()(item.lookahead_sym);
hash<string>()(item.lookahead_sym.name);
}
};

View file

@ -44,7 +44,7 @@ namespace tree_sitter {
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
if (rule.get()) {
auto consumed_symbols = item.consumed_symbols;
consumed_symbols.push_back(*rule);
consumed_symbols.push_back(rule->is_auxiliary);
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));

View file

@ -61,20 +61,12 @@ namespace tree_sitter {
}
}
static vector<bool> reduce_flags(const vector<rules::Symbol> &child_symbols) {
vector<bool> result;
for (auto symbol : child_symbols) {
result.push_back(symbol.is_auxiliary);
}
return result;
}
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
for (ParseItem item : item_set) {
if (item.is_done()) {
ParseAction action = (item.lhs.name == START) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, reduce_flags(item.consumed_symbols));
ParseAction::Reduce(item.lhs, item.consumed_symbols);
parse_table.add_action(state_index, item.lookahead_sym, action);
}
}

View file

@ -49,19 +49,21 @@ namespace tree_sitter {
value.range.max_character + "'");
}
}
bool Character::operator==(const Rule &rule) const {
const Character *other = dynamic_cast<const Character *>(&rule);
if (!other) return false;
if (other->sign != sign) return false;
auto size = matches.size();
if (other->matches.size() != size) return false;
for (int i = 0; i < size; i++)
if (!(matches[i] == other->matches[i])) return false;
return true;
return other && this->operator==(*other);
}
bool Character::operator==(const Character &other) const {
if (other.sign != sign) return false;
auto size = matches.size();
if (other.matches.size() != size) return false;
for (int i = 0; i < size; i++)
if (!(matches[i] == other.matches[i])) return false;
return true;
}
size_t Character::hash_code() const {
return typeid(this).hash_code() ^ hash<string>()(to_string());
}

View file

@ -44,6 +44,7 @@ namespace tree_sitter {
Character(const std::vector<CharacterMatch> &matches, bool sign);
bool operator==(const Rule& other) const;
bool operator==(const Character& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;

View file

@ -11,9 +11,13 @@ namespace tree_sitter {
bool Symbol::operator==(const Rule &rule) const {
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
return other && (other->name == name) && (other->is_auxiliary == is_auxiliary);
return other && this->operator==(*other);
}
bool Symbol::operator==(const Symbol &other) const {
return (other.name == name) && (other.is_auxiliary == is_auxiliary);
}
size_t Symbol::hash_code() const {
return typeid(this).hash_code() ^ hash<string>()(name) ^ hash<bool>()(is_auxiliary);
}

View file

@ -11,6 +11,8 @@ namespace tree_sitter {
Symbol(const std::string &name, bool is_auxiliary);
bool operator==(const Rule& other) const;
bool operator==(const Symbol &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;