Simplify lex item set transitions code

This commit is contained in:
Max Brunsfeld 2015-10-10 12:33:12 -07:00
parent 25791085c3
commit db9966b57c
13 changed files with 378 additions and 428 deletions

View file

@ -130,8 +130,8 @@ class LexTableBuilder {
return rules::Seq::build({
make_shared<rules::Metadata>(
separator_rule, map<rules::MetadataKey, int>({
{ rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
})),
{ rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
})),
rule,
});
}

View file

@ -3,8 +3,6 @@
#include <vector>
#include <utility>
#include "tree_sitter/compiler.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/syntax_grammar.h"
namespace tree_sitter {

View file

@ -1,7 +1,6 @@
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/merge_transitions.h"
#include "compiler/build_tables/lex_item_transitions.h"
#include "compiler/rules/symbol.h"
#include <unordered_set>
@ -48,16 +47,8 @@ bool LexItemSet::operator==(const LexItemSet &other) const {
map<CharacterSet, LexItemSet> LexItemSet::transitions() const {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : entries) {
for (auto &transition : rule_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_transition<LexItemSet>(
&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->entries.insert(right->entries.begin(), right->entries.end());
});
}
}
for (const LexItem &item : entries)
lex_item_transitions(&result, item);
return result;
}

View file

@ -0,0 +1,127 @@
#include "compiler/build_tables/lex_item_transitions.h"
#include <map>
#include <vector>
#include <functional>
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/visitor.h"
#include "compiler/build_tables/lex_item.h"
namespace tree_sitter {
namespace build_tables {
using std::function;
using std::make_shared;
using std::map;
using std::pair;
using std::vector;
using rules::CharacterSet;
class LexItemTransitions : public rules::RuleFn<void> {
map<CharacterSet, LexItemSet> *transitions;
const rules::Symbol &item_lhs;
LexItemSet transform_item_set(const LexItemSet &item_set,
function<rule_ptr(rule_ptr)> transform) {
LexItemSet new_set;
for (const LexItem &item : item_set.entries)
new_set.entries.insert(LexItem(item.lhs, transform(item.rule)));
return new_set;
}
void merge_transition(map<CharacterSet, LexItemSet> *transitions,
CharacterSet new_char_set, LexItemSet new_item_set) {
vector<pair<CharacterSet, LexItemSet>> new_entries;
auto iter = transitions->begin();
while (iter != transitions->end()) {
CharacterSet existing_char_set = iter->first;
LexItemSet &existing_item_set = iter->second;
CharacterSet intersection = existing_char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!existing_char_set.is_empty())
new_entries.push_back({ existing_char_set, existing_item_set });
existing_item_set.entries.insert(new_item_set.entries.begin(),
new_item_set.entries.end());
new_entries.push_back({ intersection, existing_item_set });
transitions->erase(iter++);
} else {
iter++;
}
}
transitions->insert(new_entries.begin(), new_entries.end());
if (!new_char_set.is_empty())
transitions->insert({ new_char_set, new_item_set });
}
void apply_to(const CharacterSet *rule) {
merge_transition(transitions, *rule,
LexItemSet({
LexItem(item_lhs, rules::Blank::build()),
}));
}
void apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
apply(element);
}
void apply_to(const rules::Seq *rule) {
map<CharacterSet, LexItemSet> left_transitions;
LexItemTransitions(&left_transitions, item_lhs).apply(rule->left);
for (auto &pair : left_transitions)
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->right });
}));
if (rule_can_be_blank(rule->left))
apply(rule->right);
}
void apply_to(const rules::Repeat *rule) {
map<CharacterSet, LexItemSet> content_transitions;
LexItemTransitions(&content_transitions, item_lhs).apply(rule->content);
for (auto &pair : content_transitions)
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->copy() });
}));
}
void apply_to(const rules::Metadata *rule) {
map<CharacterSet, LexItemSet> content_transitions;
LexItemTransitions(&content_transitions, item_lhs).apply(rule->rule);
for (auto &pair : content_transitions)
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
return make_shared<rules::Metadata>(item_rule, rule->value);
}));
}
public:
LexItemTransitions(map<CharacterSet, LexItemSet> *transitions,
const rules::Symbol &item_lhs)
: transitions(transitions), item_lhs(item_lhs) {}
};
void lex_item_transitions(map<CharacterSet, LexItemSet> *transitions,
const LexItem &item) {
LexItemTransitions(transitions, item.lhs).apply(item.rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -2,13 +2,16 @@
#define COMPILER_BUILD_TABLES_RULE_TRANSITIONS_H_
#include <map>
#include <set>
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/build_tables/lex_item.h"
namespace tree_sitter {
namespace build_tables {
std::map<rules::CharacterSet, rule_ptr> rule_transitions(const rule_ptr &);
void lex_item_transitions(std::map<rules::CharacterSet, LexItemSet> *transitions,
const LexItem &);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,54 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
#define COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_
#include <map>
#include <utility>
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace build_tables {
/*
* Merges two transition maps with character set keys. If the
* two maps contain values for overlapping character sets, the
* new value for the two sets' intersection will be computed by
* merging the old and new values using the given function.
*/
template <typename T>
void merge_transition(std::map<rules::CharacterSet, T> *left,
const std::pair<rules::CharacterSet, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
rules::CharacterSet new_char_set = new_pair.first;
T new_value = new_pair.second;
std::map<rules::CharacterSet, T> pairs_to_insert;
auto iter = left->begin();
while (iter != left->end()) {
rules::CharacterSet char_set = iter->first;
T value = iter->second;
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!char_set.is_empty())
pairs_to_insert.insert({ char_set, value });
merge_fn(&value, &new_value);
pairs_to_insert.insert({ intersection, value });
left->erase(iter++);
} else {
++iter;
}
}
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
if (!new_char_set.is_empty())
left->insert({ new_char_set, new_value });
}
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_

View file

@ -1,74 +0,0 @@
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/build_tables/merge_transitions.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace build_tables {
using std::map;
using std::make_shared;
using rules::CharacterSet;
using rules::Choice;
using rules::Symbol;
class RuleTransitions : public rules::RuleFn<map<CharacterSet, rule_ptr>> {
private:
void merge_transitions(map<CharacterSet, rule_ptr> *left,
const map<CharacterSet, rule_ptr> &right) {
for (auto &pair : right)
merge_transition<rule_ptr>(left, pair,
[](rule_ptr *left, const rule_ptr *right) {
*left = Choice::build({ *left, *right });
});
}
map<CharacterSet, rule_ptr> apply_to(const CharacterSet *rule) {
return map<CharacterSet, rule_ptr>(
{ { *rule, make_shared<rules::Blank>() } });
}
map<CharacterSet, rule_ptr> apply_to(const rules::Choice *rule) {
map<CharacterSet, rule_ptr> result;
for (const auto &el : rule->elements)
merge_transitions(&result, this->apply(el));
return result;
}
map<CharacterSet, rule_ptr> apply_to(const rules::Seq *rule) {
auto result = this->apply(rule->left);
for (auto &pair : result)
pair.second = rules::Seq::build({ pair.second, rule->right });
if (rule_can_be_blank(rule->left))
merge_transitions(&result, this->apply(rule->right));
return result;
}
map<CharacterSet, rule_ptr> apply_to(const rules::Repeat *rule) {
auto result = this->apply(rule->content);
for (auto &pair : result)
pair.second = rules::Seq::build({ pair.second, rule->copy() });
return result;
}
map<CharacterSet, rule_ptr> apply_to(const rules::Metadata *rule) {
auto result = this->apply(rule->rule);
for (auto &pair : result)
pair.second = make_shared<rules::Metadata>(pair.second, rule->value);
return result;
}
};
map<CharacterSet, rule_ptr> rule_transitions(const rule_ptr &rule) {
return RuleTransitions().apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -27,7 +27,8 @@ LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 });
}
LexAction LexAction::Advance(size_t state_index, PrecedenceRange precedence_range) {
LexAction LexAction::Advance(size_t state_index,
PrecedenceRange precedence_range) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1),
precedence_range);
}

View file

@ -40,7 +40,8 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
if (error)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
return make_tuple(flatten_grammar(syntax_grammar), normalize_rules(lex_grammar), nullptr);
return make_tuple(flatten_grammar(syntax_grammar),
normalize_rules(lex_grammar), nullptr);
}
} // namespace prepare_grammar