From 9bad5dff3e296a66ee2727349e95c0a259085e34 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 26 Jun 2014 13:42:42 -0700 Subject: [PATCH] Avoid unnecessary std::map construction when merging transition sets --- .../build_tables/merge_transitions_spec.cc | 39 ++++------ .../build_tables/item_set_transitions.cc | 4 +- src/compiler/build_tables/merge_transitions.h | 72 +++++++++---------- src/compiler/build_tables/rule_transitions.cc | 14 ++-- 4 files changed, 55 insertions(+), 74 deletions(-) diff --git a/spec/compiler/build_tables/merge_transitions_spec.cc b/spec/compiler/build_tables/merge_transitions_spec.cc index e6ca51fe..0e57dc9d 100644 --- a/spec/compiler/build_tables/merge_transitions_spec.cc +++ b/spec/compiler/build_tables/merge_transitions_spec.cc @@ -9,58 +9,49 @@ START_TEST describe("merging character set transitions", []() { typedef map int_map; - auto merge_result = [&](int_map left, int_map right) -> int_map { - merge_char_transitions(&left, right, [](int *l, const int *r) { + auto do_merge = [&](int_map *left, const pair &new_pair) { + merge_char_transitions(left, new_pair, [](int *l, const int *r) { *l = *l | *r; }); - return left; }; describe("when none of the transitions intersect", [&]() { it("returns the union of the two sets of transitions", [&]() { - int_map map1({ + int_map map({ { CharacterSet({ 'a', 'c' }), 1 }, { CharacterSet({ 'x', 'y' }), 2 }, { CharacterSet({ '1', '9' }), 4 }, }); - int_map map2({ - { CharacterSet({ ' ' }), 8 }, - { CharacterSet({ '\t' }), 16 }, - }); + do_merge(&map, { CharacterSet({ ' ' }), 8 }); + do_merge(&map, { CharacterSet({ '\t' }), 16 }); - AssertThat(merge_result(map1, map2), Equals(int_map({ + AssertThat(map, Equals(int_map({ { CharacterSet({ 'a', 'c' }), 1 }, { CharacterSet({ 'x', 'y' }), 2 }, { CharacterSet({ '1', '9' }), 4 }, { CharacterSet({ ' ' }), 8 }, { CharacterSet({ '\t' }), 16 }, }))); - - AssertThat(merge_result(map2, map1), Equals(merge_result(map1, map2))); }); }); describe("when transitions intersect", [&]() { it("merges the intersecting transitions using the provided function", [&]() { - int_map map1({ + int_map map({ { CharacterSet({ {'a', 'f'}, {'A', 'F'} }), 1 }, { CharacterSet({ {'0', '9'} }), 2 }, }); - int_map map2({ - { CharacterSet({ 'c' }), 4 }, - { CharacterSet({ '3' }), 8 }, - }); + do_merge(&map, { CharacterSet({ 'c' }), 4 }); + do_merge(&map, { CharacterSet({ '3' }), 8 }); - AssertThat(merge_result(map1, map2), Equals(int_map({ + AssertThat(map, Equals(int_map({ { CharacterSet({ {'a', 'b'}, {'d', 'f'}, {'A', 'F'} }), 1 }, { CharacterSet({ {'c'} }), 5 }, { CharacterSet({ {'0', '2'}, {'4', '9'} }), 2 }, { CharacterSet({ '3' }), 10 }, }))); - - AssertThat(merge_result(map2, map1), Equals(merge_result(map1, map2))); }); }); @@ -70,17 +61,13 @@ describe("merging character set transitions", []() { { CharacterSet({ 'a', 'c' }), 1 }, }); - int_map map2({ - { CharacterSet({ 'a' }), 2 }, - { CharacterSet({ 'c' }), 4 }, - }); + do_merge(&map1, { CharacterSet({ 'a' }), 2 }); + do_merge(&map1, { CharacterSet({ 'c' }), 4 }); - AssertThat(merge_result(map1, map2), Equals(int_map({ + AssertThat(map1, Equals(int_map({ { CharacterSet({ 'a' }), 3 }, { CharacterSet({ 'c' }), 5 }, }))); - - AssertThat(merge_result(map2, map1), Equals(merge_result(map1, map2))); }); }); }); diff --git a/src/compiler/build_tables/item_set_transitions.cc b/src/compiler/build_tables/item_set_transitions.cc index 9c5ab986..ef39caa7 100644 --- a/src/compiler/build_tables/item_set_transitions.cc +++ b/src/compiler/build_tables/item_set_transitions.cc @@ -21,7 +21,7 @@ namespace tree_sitter { const set &lookahead_symbols = pair.second; for (auto &transition : sym_transitions(item.rule)) { ParseItem new_item(item.lhs, transition.second, item.consumed_symbol_count + 1); - merge_sym_transitions(&result, {{ transition.first, item_set_closure(new_item, lookahead_symbols, grammar) }}, + merge_sym_transitions(&result, { transition.first, item_set_closure(new_item, lookahead_symbols, grammar) }, [](ParseItemSet *left, const ParseItemSet *right) { for (auto &pair : *right) left->operator[](pair.first).insert(pair.second.begin(), pair.second.end()); @@ -37,7 +37,7 @@ namespace tree_sitter { for (const LexItem &item : item_set) { for (auto &transition : char_transitions(item.rule)) { LexItem next_item(item.lhs, transition.second); - merge_char_transitions(&result, {{ transition.first, LexItemSet({ next_item }) }}, + merge_char_transitions(&result, { transition.first, LexItemSet({ next_item }) }, [](LexItemSet *left, const LexItemSet *right) { left->insert(right->begin(), right->end()); }); diff --git a/src/compiler/build_tables/merge_transitions.h b/src/compiler/build_tables/merge_transitions.h index 3a5ffbca..1a28cee8 100644 --- a/src/compiler/build_tables/merge_transitions.h +++ b/src/compiler/build_tables/merge_transitions.h @@ -16,24 +16,18 @@ namespace tree_sitter { */ template void merge_sym_transitions(std::map *left, - const std::map &right, + const std::pair &new_pair, std::function merge_fn) { - for (auto &pair : right) { - auto rule = pair.first; - bool merged = false; - for (auto &existing_pair : *left) { - auto existing_rule = existing_pair.first; - if (existing_rule == rule) { - merge_fn(&existing_pair.second, &pair.second); - merged = true; - break; - } else if (rule < existing_rule) { - break; - } + auto new_symbol = new_pair.first; + for (auto &existing_pair : *left) { + auto existing_symbol = existing_pair.first; + if (new_symbol < existing_symbol) break; + if (existing_symbol == new_symbol) { + merge_fn(&existing_pair.second, &new_pair.second); + return; } - if (!merged) - left->insert({ pair.first, pair.second }); } + left->insert(new_pair); } /* @@ -44,37 +38,35 @@ namespace tree_sitter { */ template void merge_char_transitions(std::map *left, - const std::map &right, + const std::pair &new_pair, std::function merge_fn) { - for (auto &new_pair : right) { - rules::CharacterSet new_char_set = new_pair.first; - T new_value = new_pair.second; + rules::CharacterSet new_char_set = new_pair.first; + T new_value = new_pair.second; - std::map pairs_to_insert; + std::map pairs_to_insert; - auto iter = left->begin(); - while (iter != left->end()) { - rules::CharacterSet char_set = iter->first; - T value = iter->second; + auto iter = left->begin(); + while (iter != left->end()) { + rules::CharacterSet char_set = iter->first; + T value = iter->second; - rules::CharacterSet intersection = char_set.remove_set(new_char_set); - if (!intersection.is_empty()) { - new_char_set.remove_set(intersection); - if (!char_set.is_empty()) - pairs_to_insert.insert({ char_set, value }); - merge_fn(&value, &new_value); - pairs_to_insert.insert({ intersection, value }); - left->erase(iter++); - } else { - ++iter; - } + rules::CharacterSet intersection = char_set.remove_set(new_char_set); + if (!intersection.is_empty()) { + new_char_set.remove_set(intersection); + if (!char_set.is_empty()) + pairs_to_insert.insert({ char_set, value }); + merge_fn(&value, &new_value); + pairs_to_insert.insert({ intersection, value }); + left->erase(iter++); + } else { + ++iter; } - - left->insert(pairs_to_insert.begin(), pairs_to_insert.end()); - - if (!new_char_set.is_empty()) - left->insert({ new_char_set, new_pair.second }); } + + left->insert(pairs_to_insert.begin(), pairs_to_insert.end()); + + if (!new_char_set.is_empty()) + left->insert({ new_char_set, new_pair.second }); } } } diff --git a/src/compiler/build_tables/rule_transitions.cc b/src/compiler/build_tables/rule_transitions.cc index 33d532ca..2d318aad 100644 --- a/src/compiler/build_tables/rule_transitions.cc +++ b/src/compiler/build_tables/rule_transitions.cc @@ -25,16 +25,18 @@ namespace tree_sitter { template<> void merge_transitions(map *left, const map &right) { - merge_char_transitions(left, right, [](rule_ptr *left, const rule_ptr *right) { - *left = rules::Choice::Build({ *left, *right }); - }); + for (auto &pair : right) + merge_char_transitions(left, pair, [](rule_ptr *left, const rule_ptr *right) { + *left = rules::Choice::Build({ *left, *right }); + }); } template<> void merge_transitions(map *left, const map &right) { - merge_sym_transitions(left, right, [](rule_ptr *left, const rule_ptr *right) { - *left = rules::Choice::Build({ *left, *right }); - }); + for (auto &pair : right) + merge_sym_transitions(left, pair, [](rule_ptr *left, const rule_ptr *right) { + *left = rules::Choice::Build({ *left, *right }); + }); } template