Properly merge transitions on overlapping character sets!
This commit is contained in:
parent
905a408998
commit
8baa1396fd
15 changed files with 330 additions and 207 deletions
|
|
@ -1,8 +1,8 @@
|
|||
#include "item_set_transitions.h"
|
||||
#include "item_set_closure.h"
|
||||
#include "rule_transitions.h"
|
||||
#include "merge_transitions.h"
|
||||
|
||||
using std::dynamic_pointer_cast;
|
||||
using std::make_shared;
|
||||
using std::shared_ptr;
|
||||
|
||||
|
|
@ -19,7 +19,7 @@ namespace tree_sitter {
|
|||
|
||||
transition_map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar) {
|
||||
transition_map<CharacterSet, LexItemSet> result;
|
||||
for (LexItem item : item_set) {
|
||||
for (const LexItem &item : item_set) {
|
||||
transition_map<CharacterSet, LexItemSet> item_transitions;
|
||||
for (auto transition : char_transitions(item.rule)) {
|
||||
auto rule = transition.first;
|
||||
|
|
@ -28,7 +28,7 @@ namespace tree_sitter {
|
|||
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
|
||||
}
|
||||
|
||||
result.merge(item_transitions, [](shared_ptr<const LexItemSet> left, shared_ptr<const LexItemSet> right) -> shared_ptr<const LexItemSet> {
|
||||
result = merge_char_transitions<LexItemSet>(result, item_transitions, [](shared_ptr<LexItemSet> left, shared_ptr<LexItemSet> right) {
|
||||
return make_shared<LexItemSet>(merge_sets(*left, *right));
|
||||
});
|
||||
}
|
||||
|
|
@ -38,7 +38,7 @@ namespace tree_sitter {
|
|||
|
||||
transition_map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar) {
|
||||
transition_map<rules::Symbol, ParseItemSet> result;
|
||||
for (ParseItem item : item_set) {
|
||||
for (const ParseItem &item : item_set) {
|
||||
transition_map<rules::Symbol, ParseItemSet> item_transitions;
|
||||
for (auto transition : sym_transitions(item.rule)) {
|
||||
auto rule = transition.first;
|
||||
|
|
@ -49,7 +49,7 @@ namespace tree_sitter {
|
|||
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
|
||||
}
|
||||
|
||||
result.merge(item_transitions, [](shared_ptr<const ParseItemSet> left, shared_ptr<const ParseItemSet> right) -> shared_ptr<const ParseItemSet> {
|
||||
result = merge_sym_transitions<ParseItemSet>(result, item_transitions, [](shared_ptr<ParseItemSet> left, shared_ptr<ParseItemSet> right) {
|
||||
return make_shared<ParseItemSet>(merge_sets(*left, *right));
|
||||
});
|
||||
}
|
||||
|
|
|
|||
56
src/compiler/build_tables/merge_transitions.h
Normal file
56
src/compiler/build_tables/merge_transitions.h
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
#ifndef __tree_sitter__merge_transitions__
|
||||
#define __tree_sitter__merge_transitions__
|
||||
|
||||
#include "transition_map.h"
|
||||
#include "character_set.h"
|
||||
#include "symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
template<typename T>
|
||||
transition_map<rules::Symbol, T>
|
||||
merge_sym_transitions(const transition_map<rules::Symbol, T> &left,
|
||||
const transition_map<rules::Symbol, T> &right,
|
||||
std::function<std::shared_ptr<T>(std::shared_ptr<T>, std::shared_ptr<T>)> merge_fn) {
|
||||
transition_map<rules::Symbol, T> result(left);
|
||||
for (auto &pair : right) {
|
||||
auto rule = pair.first;
|
||||
bool merged = false;
|
||||
for (auto &existing_pair : result) {
|
||||
auto existing_rule = existing_pair.first;
|
||||
if (existing_rule->operator==(*rule)) {
|
||||
existing_pair.second = merge_fn(existing_pair.second, pair.second);
|
||||
merged = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!merged)
|
||||
result.add(pair.first, pair.second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
transition_map<rules::CharacterSet, T>
|
||||
merge_char_transitions(const transition_map<rules::CharacterSet, T> &left,
|
||||
const transition_map<rules::CharacterSet, T> &right,
|
||||
std::function<std::shared_ptr<T>(std::shared_ptr<T>, std::shared_ptr<T>)> merge_fn) {
|
||||
transition_map<rules::CharacterSet, T> result(left);
|
||||
for (auto &pair : right) {
|
||||
auto rule = pair.first;
|
||||
for (auto &existing_pair : left) {
|
||||
auto existing_rule = existing_pair.first;
|
||||
auto intersection = existing_rule->remove_set(*rule);
|
||||
if (!intersection.is_empty()) {
|
||||
rule->remove_set(intersection);
|
||||
result.add(std::make_shared<rules::CharacterSet>(intersection), merge_fn(existing_pair.second, pair.second));
|
||||
}
|
||||
}
|
||||
result.add(rule, pair.second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
#include "rule_transitions.h"
|
||||
#include "rules.h"
|
||||
#include "merge_transitions.h"
|
||||
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
|
|
@ -9,6 +10,23 @@ namespace tree_sitter {
|
|||
return typeid(*rule) == typeid(Blank);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
transition_map<T, Rule> merge_transitions(const transition_map<T, Rule> &left, const transition_map<T, Rule> &right);
|
||||
|
||||
template<>
|
||||
transition_map<CharacterSet, Rule> merge_transitions(const transition_map<CharacterSet, Rule> &left, const transition_map<CharacterSet, Rule> &right) {
|
||||
return merge_char_transitions<Rule>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
transition_map<Symbol, Rule> merge_transitions(const transition_map<Symbol, Rule> &left, const transition_map<Symbol, Rule> &right) {
|
||||
return merge_sym_transitions<Rule>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
class TransitionsVisitor : public rules::Visitor {
|
||||
public:
|
||||
|
|
@ -23,7 +41,7 @@ namespace tree_sitter {
|
|||
void visit_atom(const Rule *rule) {
|
||||
auto atom = dynamic_cast<const T *>(rule);
|
||||
if (atom) {
|
||||
value = transition_map<T, Rule>({{ std::make_shared<const T>(*atom), blank() }});
|
||||
value = transition_map<T, Rule>({{ std::make_shared<T>(*atom), blank() }});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -37,9 +55,7 @@ namespace tree_sitter {
|
|||
|
||||
void visit(const Choice *rule) {
|
||||
value = transitions(rule->left);
|
||||
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
value = merge_transitions<T>(transitions(rule->left), transitions(rule->right));
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
|
|
@ -50,9 +66,7 @@ namespace tree_sitter {
|
|||
return seq({ left_rule, rule->right });
|
||||
});
|
||||
if (rule_can_be_blank(rule->left)) {
|
||||
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
value = merge_transitions<T>(value, transitions(rule->right));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,13 +8,14 @@
|
|||
namespace tree_sitter {
|
||||
template<typename TKey, typename TValue>
|
||||
class transition_map {
|
||||
typedef std::shared_ptr<const TKey> TKeyPtr;
|
||||
typedef std::shared_ptr<const TValue> TValuePtr;
|
||||
typedef std::pair<const TKeyPtr, TValuePtr> pair_type;
|
||||
typedef std::shared_ptr<TKey> TKeyPtr;
|
||||
typedef std::shared_ptr<TValue> TValuePtr;
|
||||
typedef std::pair<TKeyPtr, TValuePtr> pair_type;
|
||||
typedef std::vector<pair_type> contents_type;
|
||||
|
||||
contents_type contents;
|
||||
|
||||
public:
|
||||
|
||||
transition_map() : contents(contents_type()) {};
|
||||
transition_map(std::vector<pair_type> pairs) : contents(pairs) {};
|
||||
|
||||
|
|
@ -33,15 +34,6 @@ namespace tree_sitter {
|
|||
contents.push_back(pair_type(key, value));
|
||||
}
|
||||
|
||||
void merge(const transition_map<TKey, TValue> &other, std::function<TValuePtr(TValuePtr, TValuePtr)> merge_fn) {
|
||||
for (pair_type other_pair : other) {
|
||||
if (pair_type *current_pair = pair_for_key(*other_pair.first))
|
||||
current_pair->second = merge_fn(current_pair->second, other_pair.second);
|
||||
else
|
||||
add(other_pair.first, other_pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
TValuePtr operator[](const TKey &key) const {
|
||||
for (auto pair : *this) {
|
||||
if (*pair.first == key) {
|
||||
|
|
@ -52,7 +44,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
template<typename NewV>
|
||||
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<const NewV>(TValuePtr)> map_fn) {
|
||||
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<NewV>(TValuePtr)> map_fn) {
|
||||
transition_map<TKey, NewV> result;
|
||||
for (pair_type pair : *this) {
|
||||
auto new_value = map_fn(pair.second);
|
||||
|
|
@ -70,18 +62,6 @@ namespace tree_sitter {
|
|||
const_iterator begin() const { return contents.begin(); }
|
||||
const_iterator end() const { return contents.end(); }
|
||||
size_t size() const { return contents.size(); }
|
||||
|
||||
private:
|
||||
|
||||
pair_type * pair_for_key(const TKey &key) {
|
||||
for (int i = 0; i < contents.size(); i++) {
|
||||
pair_type *pair = &contents[i];
|
||||
if (*pair->first == key) return pair;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
contents_type contents;
|
||||
};
|
||||
|
||||
template<typename K, typename V>
|
||||
|
|
|
|||
|
|
@ -98,6 +98,8 @@ namespace tree_sitter {
|
|||
return "\\0";
|
||||
case '"':
|
||||
return "\\\"";
|
||||
case '\\':
|
||||
return "\\\\";
|
||||
default:
|
||||
return string() + character;
|
||||
}
|
||||
|
|
@ -108,16 +110,20 @@ namespace tree_sitter {
|
|||
if (range.min == range.max) {
|
||||
return lookahead + " == '" + character_code(range.min) + "'";
|
||||
} else {
|
||||
return string("'") + range.min + string("' <= ") + lookahead +
|
||||
" && " + lookahead + " <= '" + range.max + "'";
|
||||
return string("'") + character_code(range.min) + string("' <= ") + lookahead +
|
||||
" && " + lookahead + " <= '" + character_code(range.max) + "'";
|
||||
}
|
||||
}
|
||||
|
||||
string condition_for_character_set(const rules::CharacterSet &set) {
|
||||
vector<string> parts;
|
||||
for (auto &match : set.ranges)
|
||||
parts.push_back("(" + condition_for_character_range(match) + ")");
|
||||
return join(parts, " ||\n ");
|
||||
if (set.ranges.size() == 1) {
|
||||
return condition_for_character_range(*set.ranges.begin());
|
||||
} else {
|
||||
for (auto &match : set.ranges)
|
||||
parts.push_back("(" + condition_for_character_range(match) + ")");
|
||||
return join(parts, " ||\n ");
|
||||
}
|
||||
}
|
||||
|
||||
string condition_for_character_rule(const rules::CharacterSet &rule) {
|
||||
|
|
|
|||
|
|
@ -154,6 +154,10 @@ namespace tree_sitter {
|
|||
return removed_set;
|
||||
}
|
||||
|
||||
bool CharacterSet::is_empty() const {
|
||||
return ranges.empty();
|
||||
}
|
||||
|
||||
void CharacterSet::add_set(const CharacterSet &other) {
|
||||
for (auto &other_range : other.ranges) {
|
||||
add_range(this, other_range);
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ namespace tree_sitter {
|
|||
CharacterSet complement() const;
|
||||
CharacterSet intersect(const CharacterSet &) const;
|
||||
std::pair<CharacterSet, bool> most_compact_representation() const;
|
||||
bool is_empty() const;
|
||||
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
|
|
@ -51,7 +52,7 @@ namespace tree_sitter {
|
|||
std::set<CharacterRange> ranges;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const CharacterSet> char_ptr;
|
||||
typedef std::shared_ptr<CharacterSet> char_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ namespace tree_sitter {
|
|||
class Visitor;
|
||||
class Rule;
|
||||
|
||||
typedef std::shared_ptr<const Rule> rule_ptr;
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
|
||||
class Rule {
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ namespace tree_sitter {
|
|||
bool is_auxiliary;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Symbol> sym_ptr;
|
||||
typedef std::shared_ptr<Symbol> sym_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue