Properly merge transitions on overlapping character sets!

This commit is contained in:
Max Brunsfeld 2014-02-10 13:20:43 -08:00
parent 905a408998
commit 8baa1396fd
15 changed files with 330 additions and 207 deletions

View file

@ -1,8 +1,8 @@
#include "item_set_transitions.h"
#include "item_set_closure.h"
#include "rule_transitions.h"
#include "merge_transitions.h"
using std::dynamic_pointer_cast;
using std::make_shared;
using std::shared_ptr;
@ -19,7 +19,7 @@ namespace tree_sitter {
transition_map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar) {
transition_map<CharacterSet, LexItemSet> result;
for (LexItem item : item_set) {
for (const LexItem &item : item_set) {
transition_map<CharacterSet, LexItemSet> item_transitions;
for (auto transition : char_transitions(item.rule)) {
auto rule = transition.first;
@ -28,7 +28,7 @@ namespace tree_sitter {
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
}
result.merge(item_transitions, [](shared_ptr<const LexItemSet> left, shared_ptr<const LexItemSet> right) -> shared_ptr<const LexItemSet> {
result = merge_char_transitions<LexItemSet>(result, item_transitions, [](shared_ptr<LexItemSet> left, shared_ptr<LexItemSet> right) {
return make_shared<LexItemSet>(merge_sets(*left, *right));
});
}
@ -38,7 +38,7 @@ namespace tree_sitter {
transition_map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar) {
transition_map<rules::Symbol, ParseItemSet> result;
for (ParseItem item : item_set) {
for (const ParseItem &item : item_set) {
transition_map<rules::Symbol, ParseItemSet> item_transitions;
for (auto transition : sym_transitions(item.rule)) {
auto rule = transition.first;
@ -49,7 +49,7 @@ namespace tree_sitter {
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
}
result.merge(item_transitions, [](shared_ptr<const ParseItemSet> left, shared_ptr<const ParseItemSet> right) -> shared_ptr<const ParseItemSet> {
result = merge_sym_transitions<ParseItemSet>(result, item_transitions, [](shared_ptr<ParseItemSet> left, shared_ptr<ParseItemSet> right) {
return make_shared<ParseItemSet>(merge_sets(*left, *right));
});
}

View file

@ -0,0 +1,56 @@
#ifndef __tree_sitter__merge_transitions__
#define __tree_sitter__merge_transitions__
#include "transition_map.h"
#include "character_set.h"
#include "symbol.h"
namespace tree_sitter {
namespace build_tables {
template<typename T>
transition_map<rules::Symbol, T>
merge_sym_transitions(const transition_map<rules::Symbol, T> &left,
const transition_map<rules::Symbol, T> &right,
std::function<std::shared_ptr<T>(std::shared_ptr<T>, std::shared_ptr<T>)> merge_fn) {
transition_map<rules::Symbol, T> result(left);
for (auto &pair : right) {
auto rule = pair.first;
bool merged = false;
for (auto &existing_pair : result) {
auto existing_rule = existing_pair.first;
if (existing_rule->operator==(*rule)) {
existing_pair.second = merge_fn(existing_pair.second, pair.second);
merged = true;
break;
}
}
if (!merged)
result.add(pair.first, pair.second);
}
return result;
}
template<typename T>
transition_map<rules::CharacterSet, T>
merge_char_transitions(const transition_map<rules::CharacterSet, T> &left,
const transition_map<rules::CharacterSet, T> &right,
std::function<std::shared_ptr<T>(std::shared_ptr<T>, std::shared_ptr<T>)> merge_fn) {
transition_map<rules::CharacterSet, T> result(left);
for (auto &pair : right) {
auto rule = pair.first;
for (auto &existing_pair : left) {
auto existing_rule = existing_pair.first;
auto intersection = existing_rule->remove_set(*rule);
if (!intersection.is_empty()) {
rule->remove_set(intersection);
result.add(std::make_shared<rules::CharacterSet>(intersection), merge_fn(existing_pair.second, pair.second));
}
}
result.add(rule, pair.second);
}
return result;
}
}
}
#endif

View file

@ -1,5 +1,6 @@
#include "rule_transitions.h"
#include "rules.h"
#include "merge_transitions.h"
using namespace tree_sitter::rules;
@ -9,6 +10,23 @@ namespace tree_sitter {
return typeid(*rule) == typeid(Blank);
}
template<typename T>
transition_map<T, Rule> merge_transitions(const transition_map<T, Rule> &left, const transition_map<T, Rule> &right);
template<>
transition_map<CharacterSet, Rule> merge_transitions(const transition_map<CharacterSet, Rule> &left, const transition_map<CharacterSet, Rule> &right) {
return merge_char_transitions<Rule>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
}
template<>
transition_map<Symbol, Rule> merge_transitions(const transition_map<Symbol, Rule> &left, const transition_map<Symbol, Rule> &right) {
return merge_sym_transitions<Rule>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
}
template<typename T>
class TransitionsVisitor : public rules::Visitor {
public:
@ -23,7 +41,7 @@ namespace tree_sitter {
void visit_atom(const Rule *rule) {
auto atom = dynamic_cast<const T *>(rule);
if (atom) {
value = transition_map<T, Rule>({{ std::make_shared<const T>(*atom), blank() }});
value = transition_map<T, Rule>({{ std::make_shared<T>(*atom), blank() }});
}
}
@ -37,9 +55,7 @@ namespace tree_sitter {
void visit(const Choice *rule) {
value = transitions(rule->left);
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
value = merge_transitions<T>(transitions(rule->left), transitions(rule->right));
}
void visit(const Seq *rule) {
@ -50,9 +66,7 @@ namespace tree_sitter {
return seq({ left_rule, rule->right });
});
if (rule_can_be_blank(rule->left)) {
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
value = merge_transitions<T>(value, transitions(rule->right));
}
}

View file

@ -8,13 +8,14 @@
namespace tree_sitter {
template<typename TKey, typename TValue>
class transition_map {
typedef std::shared_ptr<const TKey> TKeyPtr;
typedef std::shared_ptr<const TValue> TValuePtr;
typedef std::pair<const TKeyPtr, TValuePtr> pair_type;
typedef std::shared_ptr<TKey> TKeyPtr;
typedef std::shared_ptr<TValue> TValuePtr;
typedef std::pair<TKeyPtr, TValuePtr> pair_type;
typedef std::vector<pair_type> contents_type;
contents_type contents;
public:
transition_map() : contents(contents_type()) {};
transition_map(std::vector<pair_type> pairs) : contents(pairs) {};
@ -33,15 +34,6 @@ namespace tree_sitter {
contents.push_back(pair_type(key, value));
}
void merge(const transition_map<TKey, TValue> &other, std::function<TValuePtr(TValuePtr, TValuePtr)> merge_fn) {
for (pair_type other_pair : other) {
if (pair_type *current_pair = pair_for_key(*other_pair.first))
current_pair->second = merge_fn(current_pair->second, other_pair.second);
else
add(other_pair.first, other_pair.second);
}
}
TValuePtr operator[](const TKey &key) const {
for (auto pair : *this) {
if (*pair.first == key) {
@ -52,7 +44,7 @@ namespace tree_sitter {
}
template<typename NewV>
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<const NewV>(TValuePtr)> map_fn) {
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<NewV>(TValuePtr)> map_fn) {
transition_map<TKey, NewV> result;
for (pair_type pair : *this) {
auto new_value = map_fn(pair.second);
@ -70,18 +62,6 @@ namespace tree_sitter {
const_iterator begin() const { return contents.begin(); }
const_iterator end() const { return contents.end(); }
size_t size() const { return contents.size(); }
private:
pair_type * pair_for_key(const TKey &key) {
for (int i = 0; i < contents.size(); i++) {
pair_type *pair = &contents[i];
if (*pair->first == key) return pair;
}
return NULL;
}
contents_type contents;
};
template<typename K, typename V>

View file

@ -98,6 +98,8 @@ namespace tree_sitter {
return "\\0";
case '"':
return "\\\"";
case '\\':
return "\\\\";
default:
return string() + character;
}
@ -108,16 +110,20 @@ namespace tree_sitter {
if (range.min == range.max) {
return lookahead + " == '" + character_code(range.min) + "'";
} else {
return string("'") + range.min + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + range.max + "'";
return string("'") + character_code(range.min) + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + character_code(range.max) + "'";
}
}
string condition_for_character_set(const rules::CharacterSet &set) {
vector<string> parts;
for (auto &match : set.ranges)
parts.push_back("(" + condition_for_character_range(match) + ")");
return join(parts, " ||\n ");
if (set.ranges.size() == 1) {
return condition_for_character_range(*set.ranges.begin());
} else {
for (auto &match : set.ranges)
parts.push_back("(" + condition_for_character_range(match) + ")");
return join(parts, " ||\n ");
}
}
string condition_for_character_rule(const rules::CharacterSet &rule) {

View file

@ -154,6 +154,10 @@ namespace tree_sitter {
return removed_set;
}
bool CharacterSet::is_empty() const {
return ranges.empty();
}
void CharacterSet::add_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
add_range(this, other_range);

View file

@ -38,6 +38,7 @@ namespace tree_sitter {
CharacterSet complement() const;
CharacterSet intersect(const CharacterSet &) const;
std::pair<CharacterSet, bool> most_compact_representation() const;
bool is_empty() const;
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
@ -51,7 +52,7 @@ namespace tree_sitter {
std::set<CharacterRange> ranges;
};
typedef std::shared_ptr<const CharacterSet> char_ptr;
typedef std::shared_ptr<CharacterSet> char_ptr;
}
}

View file

@ -8,7 +8,7 @@ namespace tree_sitter {
class Visitor;
class Rule;
typedef std::shared_ptr<const Rule> rule_ptr;
typedef std::shared_ptr<Rule> rule_ptr;
class Rule {
public:

View file

@ -23,7 +23,7 @@ namespace tree_sitter {
bool is_auxiliary;
};
typedef std::shared_ptr<const Symbol> sym_ptr;
typedef std::shared_ptr<Symbol> sym_ptr;
}
}