Avoid unnecessary std::map construction when merging transition sets

This commit is contained in:
Max Brunsfeld 2014-06-26 13:42:42 -07:00
parent 9686c57e90
commit 9bad5dff3e
4 changed files with 55 additions and 74 deletions

View file

@ -9,58 +9,49 @@ START_TEST
describe("merging character set transitions", []() {
typedef map<CharacterSet, int> int_map;
auto merge_result = [&](int_map left, int_map right) -> int_map {
merge_char_transitions<int>(&left, right, [](int *l, const int *r) {
auto do_merge = [&](int_map *left, const pair<CharacterSet, int> &new_pair) {
merge_char_transitions<int>(left, new_pair, [](int *l, const int *r) {
*l = *l | *r;
});
return left;
};
describe("when none of the transitions intersect", [&]() {
it("returns the union of the two sets of transitions", [&]() {
int_map map1({
int_map map({
{ CharacterSet({ 'a', 'c' }), 1 },
{ CharacterSet({ 'x', 'y' }), 2 },
{ CharacterSet({ '1', '9' }), 4 },
});
int_map map2({
{ CharacterSet({ ' ' }), 8 },
{ CharacterSet({ '\t' }), 16 },
});
do_merge(&map, { CharacterSet({ ' ' }), 8 });
do_merge(&map, { CharacterSet({ '\t' }), 16 });
AssertThat(merge_result(map1, map2), Equals(int_map({
AssertThat(map, Equals(int_map({
{ CharacterSet({ 'a', 'c' }), 1 },
{ CharacterSet({ 'x', 'y' }), 2 },
{ CharacterSet({ '1', '9' }), 4 },
{ CharacterSet({ ' ' }), 8 },
{ CharacterSet({ '\t' }), 16 },
})));
AssertThat(merge_result(map2, map1), Equals(merge_result(map1, map2)));
});
});
describe("when transitions intersect", [&]() {
it("merges the intersecting transitions using the provided function", [&]() {
int_map map1({
int_map map({
{ CharacterSet({ {'a', 'f'}, {'A', 'F'} }), 1 },
{ CharacterSet({ {'0', '9'} }), 2 },
});
int_map map2({
{ CharacterSet({ 'c' }), 4 },
{ CharacterSet({ '3' }), 8 },
});
do_merge(&map, { CharacterSet({ 'c' }), 4 });
do_merge(&map, { CharacterSet({ '3' }), 8 });
AssertThat(merge_result(map1, map2), Equals(int_map({
AssertThat(map, Equals(int_map({
{ CharacterSet({ {'a', 'b'}, {'d', 'f'}, {'A', 'F'} }), 1 },
{ CharacterSet({ {'c'} }), 5 },
{ CharacterSet({ {'0', '2'}, {'4', '9'} }), 2 },
{ CharacterSet({ '3' }), 10 },
})));
AssertThat(merge_result(map2, map1), Equals(merge_result(map1, map2)));
});
});
@ -70,17 +61,13 @@ describe("merging character set transitions", []() {
{ CharacterSet({ 'a', 'c' }), 1 },
});
int_map map2({
{ CharacterSet({ 'a' }), 2 },
{ CharacterSet({ 'c' }), 4 },
});
do_merge(&map1, { CharacterSet({ 'a' }), 2 });
do_merge(&map1, { CharacterSet({ 'c' }), 4 });
AssertThat(merge_result(map1, map2), Equals(int_map({
AssertThat(map1, Equals(int_map({
{ CharacterSet({ 'a' }), 3 },
{ CharacterSet({ 'c' }), 5 },
})));
AssertThat(merge_result(map2, map1), Equals(merge_result(map1, map2)));
});
});
});

View file

@ -21,7 +21,7 @@ namespace tree_sitter {
const set<Symbol> &lookahead_symbols = pair.second;
for (auto &transition : sym_transitions(item.rule)) {
ParseItem new_item(item.lhs, transition.second, item.consumed_symbol_count + 1);
merge_sym_transitions<ParseItemSet>(&result, {{ transition.first, item_set_closure(new_item, lookahead_symbols, grammar) }},
merge_sym_transitions<ParseItemSet>(&result, { transition.first, item_set_closure(new_item, lookahead_symbols, grammar) },
[](ParseItemSet *left, const ParseItemSet *right) {
for (auto &pair : *right)
left->operator[](pair.first).insert(pair.second.begin(), pair.second.end());
@ -37,7 +37,7 @@ namespace tree_sitter {
for (const LexItem &item : item_set) {
for (auto &transition : char_transitions(item.rule)) {
LexItem next_item(item.lhs, transition.second);
merge_char_transitions<LexItemSet>(&result, {{ transition.first, LexItemSet({ next_item }) }},
merge_char_transitions<LexItemSet>(&result, { transition.first, LexItemSet({ next_item }) },
[](LexItemSet *left, const LexItemSet *right) {
left->insert(right->begin(), right->end());
});

View file

@ -16,24 +16,18 @@ namespace tree_sitter {
*/
template<typename T>
void merge_sym_transitions(std::map<rules::Symbol, T> *left,
const std::map<rules::Symbol, T> &right,
const std::pair<rules::Symbol, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
for (auto &pair : right) {
auto rule = pair.first;
bool merged = false;
for (auto &existing_pair : *left) {
auto existing_rule = existing_pair.first;
if (existing_rule == rule) {
merge_fn(&existing_pair.second, &pair.second);
merged = true;
break;
} else if (rule < existing_rule) {
break;
}
auto new_symbol = new_pair.first;
for (auto &existing_pair : *left) {
auto existing_symbol = existing_pair.first;
if (new_symbol < existing_symbol) break;
if (existing_symbol == new_symbol) {
merge_fn(&existing_pair.second, &new_pair.second);
return;
}
if (!merged)
left->insert({ pair.first, pair.second });
}
left->insert(new_pair);
}
/*
@ -44,37 +38,35 @@ namespace tree_sitter {
*/
template<typename T>
void merge_char_transitions(std::map<rules::CharacterSet, T> *left,
const std::map<rules::CharacterSet, T> &right,
const std::pair<rules::CharacterSet, T> &new_pair,
std::function<void(T *, const T *)> merge_fn) {
for (auto &new_pair : right) {
rules::CharacterSet new_char_set = new_pair.first;
T new_value = new_pair.second;
rules::CharacterSet new_char_set = new_pair.first;
T new_value = new_pair.second;
std::map<rules::CharacterSet, T> pairs_to_insert;
std::map<rules::CharacterSet, T> pairs_to_insert;
auto iter = left->begin();
while (iter != left->end()) {
rules::CharacterSet char_set = iter->first;
T value = iter->second;
auto iter = left->begin();
while (iter != left->end()) {
rules::CharacterSet char_set = iter->first;
T value = iter->second;
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!char_set.is_empty())
pairs_to_insert.insert({ char_set, value });
merge_fn(&value, &new_value);
pairs_to_insert.insert({ intersection, value });
left->erase(iter++);
} else {
++iter;
}
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!char_set.is_empty())
pairs_to_insert.insert({ char_set, value });
merge_fn(&value, &new_value);
pairs_to_insert.insert({ intersection, value });
left->erase(iter++);
} else {
++iter;
}
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
if (!new_char_set.is_empty())
left->insert({ new_char_set, new_pair.second });
}
left->insert(pairs_to_insert.begin(), pairs_to_insert.end());
if (!new_char_set.is_empty())
left->insert({ new_char_set, new_pair.second });
}
}
}

View file

@ -25,16 +25,18 @@ namespace tree_sitter {
template<>
void merge_transitions(map<CharacterSet, rule_ptr> *left, const map<CharacterSet, rule_ptr> &right) {
merge_char_transitions<rule_ptr>(left, right, [](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
for (auto &pair : right)
merge_char_transitions<rule_ptr>(left, pair, [](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template<>
void merge_transitions(map<Symbol, rule_ptr> *left, const map<Symbol, rule_ptr> &right) {
merge_sym_transitions<rule_ptr>(left, right, [](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
for (auto &pair : right)
merge_sym_transitions<rule_ptr>(left, pair, [](rule_ptr *left, const rule_ptr *right) {
*left = rules::Choice::Build({ *left, *right });
});
}
template<typename T>