Compute item set transitions
This commit is contained in:
parent
100ab56779
commit
99c216f78a
23 changed files with 250 additions and 55 deletions
|
|
@ -5,7 +5,7 @@ namespace tree_sitter {
|
|||
rules(rules),
|
||||
start_rule_name(rules.begin()->first) {}
|
||||
|
||||
const rules::rule_ptr Grammar::rule(const std::string &name) {
|
||||
const rules::rule_ptr Grammar::rule(const std::string &name) const {
|
||||
auto iter = rules.find(name);
|
||||
return (iter == rules.end()) ?
|
||||
rules::rule_ptr(nullptr) :
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ namespace tree_sitter {
|
|||
|
||||
public:
|
||||
Grammar(const rule_map_init_list &rules);
|
||||
const rules::rule_ptr rule(const std::string &);
|
||||
const rules::rule_ptr rule(const std::string &) const;
|
||||
const std::string start_rule_name;
|
||||
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -10,13 +10,13 @@ namespace tree_sitter {
|
|||
rule(rule),
|
||||
consumed_sym_count(consumed_sym_count) {};
|
||||
|
||||
Item Item::at_beginning_of_rule(const std::string &rule_name, Grammar &grammar) {
|
||||
Item Item::at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar) {
|
||||
return Item(rule_name, grammar.rule(rule_name), 0);
|
||||
}
|
||||
|
||||
TransitionMap<Item> Item::transitions() const {
|
||||
return rule->transitions().map<Item>([&](rules::rule_ptr to_rule) {
|
||||
return item_ptr(new Item(rule_name, to_rule, consumed_sym_count + 1));
|
||||
return std::make_shared<Item>(rule_name, to_rule, consumed_sym_count + 1);
|
||||
});
|
||||
};
|
||||
|
||||
|
|
@ -24,16 +24,15 @@ namespace tree_sitter {
|
|||
vector<rules::sym_ptr> result;
|
||||
for (auto pair : rule->transitions()) {
|
||||
shared_ptr<const rules::Symbol> sym = dynamic_pointer_cast<const rules::Symbol>(pair.first);
|
||||
if (sym != nullptr) result.push_back(sym);
|
||||
if (sym) result.push_back(sym);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Item::operator==(const Item &other) const {
|
||||
return (
|
||||
other.rule_name == rule_name &&
|
||||
other.rule == rule &&
|
||||
other.consumed_sym_count == consumed_sym_count);
|
||||
bool rule_names_eq = other.rule_name == rule_name;
|
||||
bool rules_eq = (*other.rule == *rule);
|
||||
return rule_names_eq && rules_eq;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(ostream &stream, const Item &item) {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ namespace tree_sitter {
|
|||
class Item {
|
||||
public:
|
||||
Item(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count);
|
||||
static Item at_beginning_of_rule(const std::string &rule_name, Grammar &grammar);
|
||||
static Item at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar);
|
||||
|
||||
TransitionMap<Item> transitions() const;
|
||||
std::vector<rules::sym_ptr> next_symbols() const;
|
||||
|
|
|
|||
|
|
@ -8,16 +8,12 @@ namespace tree_sitter {
|
|||
namespace lr {
|
||||
ItemSet::ItemSet(const vector<Item> &items) : contents(items) {}
|
||||
ItemSet::ItemSet(const initializer_list<Item> &items) : contents(items) {}
|
||||
|
||||
TransitionMap<ItemSet> ItemSet::transitions() const {
|
||||
return TransitionMap<ItemSet>();
|
||||
}
|
||||
|
||||
bool vector_contains(vector<Item> items, lr::Item item) {
|
||||
|
||||
static bool vector_contains(vector<Item> items, lr::Item item) {
|
||||
return (std::find(items.begin(), items.end(), item) != items.end());
|
||||
}
|
||||
|
||||
void add_item(vector<Item> &vector, const Item &item, Grammar &grammar) {
|
||||
static void add_item(vector<Item> &vector, const Item &item, const Grammar &grammar) {
|
||||
if (!vector_contains(vector, item)) {
|
||||
vector.push_back(item);
|
||||
for (rules::sym_ptr rule : item.next_symbols()) {
|
||||
|
|
@ -26,12 +22,51 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
static vector<Item> closure_in_grammar(const Item &item, const Grammar &grammar) {
|
||||
vector<Item> result;
|
||||
add_item(result, item, grammar);
|
||||
return result;
|
||||
}
|
||||
|
||||
ItemSet::ItemSet(const Item &item, const Grammar &grammar) : contents(closure_in_grammar(item, grammar)) {}
|
||||
|
||||
TransitionMap<ItemSet> ItemSet::char_transitions(const Grammar &grammar) const {
|
||||
auto result = TransitionMap<ItemSet>();
|
||||
for (auto item : *this) {
|
||||
auto new_set = item.transitions()
|
||||
.where([&](const rules::rule_ptr &on_rule) -> bool {
|
||||
return typeid(*on_rule) != typeid(rules::Symbol);
|
||||
})
|
||||
.map<ItemSet>([&](const item_ptr &item) -> item_set_ptr {
|
||||
return std::make_shared<ItemSet>(*item, grammar);
|
||||
});
|
||||
result.merge(new_set, [&](const item_set_ptr left, const item_set_ptr right) -> item_set_ptr {
|
||||
return left;
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ItemSet ItemSet::closure_in_grammar(Grammar &grammar) const {
|
||||
vector<Item> items;
|
||||
for (Item item : *this)
|
||||
add_item(items, item, grammar);
|
||||
return ItemSet(items);
|
||||
TransitionMap<ItemSet> ItemSet::sym_transitions(const Grammar &grammar) const {
|
||||
auto result = TransitionMap<ItemSet>();
|
||||
for (auto item : *this) {
|
||||
auto new_set = item.transitions()
|
||||
.where([&](const rules::rule_ptr &on_rule) -> bool {
|
||||
return typeid(*on_rule) == typeid(rules::Symbol);
|
||||
})
|
||||
.map<ItemSet>([&](const item_ptr &item) -> item_set_ptr {
|
||||
return std::make_shared<ItemSet>(*item, grammar);
|
||||
});
|
||||
result.merge(new_set, [&](const item_set_ptr left, const item_set_ptr right) -> item_set_ptr {
|
||||
return left;
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool ItemSet::operator==(const tree_sitter::lr::ItemSet &other) const {
|
||||
return contents == other.contents;
|
||||
}
|
||||
|
||||
#pragma mark - container
|
||||
|
|
|
|||
|
|
@ -6,10 +6,14 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace lr {
|
||||
class ItemSet;
|
||||
typedef std::shared_ptr<const ItemSet> item_set_ptr;
|
||||
|
||||
class ItemSet {
|
||||
public:
|
||||
ItemSet(const std::vector<Item> &items);
|
||||
ItemSet(const std::initializer_list<Item> &items);
|
||||
ItemSet(const Item &item, const Grammar &grammar);
|
||||
|
||||
typedef Item value_type;
|
||||
typedef std::vector<Item>::const_iterator const_iterator;
|
||||
|
|
@ -17,12 +21,14 @@ namespace tree_sitter {
|
|||
const_iterator end() const;
|
||||
size_t size() const;
|
||||
|
||||
ItemSet closure_in_grammar(Grammar &grammar) const;
|
||||
TransitionMap<ItemSet> transitions() const;
|
||||
TransitionMap<ItemSet> sym_transitions(const Grammar &grammar) const;
|
||||
TransitionMap<ItemSet> char_transitions(const Grammar &grammar) const;
|
||||
|
||||
bool operator==(const ItemSet &other) const;
|
||||
const std::vector<Item> contents;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const ItemSet> item_set_ptr;
|
||||
std::ostream& operator<<(std::ostream &stream, const ItemSet &item_set);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "string.h"
|
||||
#include "pattern.h"
|
||||
#include "char.h"
|
||||
#include "char_class.h"
|
||||
#include "repeat.h"
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
bool Blank::operator==(const Rule &rule) const {
|
||||
return dynamic_cast<const Blank *>(&rule) != NULL;
|
||||
return dynamic_cast<const Blank *>(&rule) != nullptr;
|
||||
}
|
||||
|
||||
std::string Blank::to_string() const {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
Char::Char(char value) : value(value) {};
|
||||
|
||||
char_ptr character(char value) {
|
||||
rule_ptr character(char value) {
|
||||
return std::make_shared<Char>(value);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,8 +15,7 @@ namespace tree_sitter {
|
|||
const char value;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Char> char_ptr;
|
||||
char_ptr character(char value);
|
||||
rule_ptr character(char value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
33
src/rules/char_class.cpp
Normal file
33
src/rules/char_class.cpp
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
#include "char_class.h"
|
||||
#include "blank.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
CharClass::CharClass(CharClassType value) : value(value) {};
|
||||
|
||||
rule_ptr char_class(CharClassType type) {
|
||||
return std::make_shared<CharClass>(type);
|
||||
}
|
||||
|
||||
TransitionMap<Rule> CharClass::transitions() const {
|
||||
return TransitionMap<Rule>({{ char_class(value), blank() }});
|
||||
}
|
||||
|
||||
bool CharClass::operator==(const Rule &rule) const {
|
||||
const CharClass *other = dynamic_cast<const CharClass *>(&rule);
|
||||
return other && (other->value == value);
|
||||
}
|
||||
|
||||
string CharClass::to_string() const {
|
||||
switch (value) {
|
||||
case CharClassTypeDigit:
|
||||
return "<digit>";
|
||||
case CharClassTypeWord:
|
||||
return "<word>";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
27
src/rules/char_class.h
Normal file
27
src/rules/char_class.h
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef __tree_sitter__char_class__
|
||||
#define __tree_sitter__char_class__
|
||||
|
||||
#include "rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
typedef enum {
|
||||
CharClassTypeWord,
|
||||
CharClassTypeDigit
|
||||
} CharClassType;
|
||||
|
||||
class CharClass : public Rule {
|
||||
public:
|
||||
CharClass(CharClassType type);
|
||||
TransitionMap<Rule> transitions() const;
|
||||
bool operator==(const Rule& other) const;
|
||||
std::string to_string() const;
|
||||
private:
|
||||
const CharClassType value;
|
||||
};
|
||||
|
||||
rule_ptr char_class(CharClassType value);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -6,7 +6,10 @@ namespace tree_sitter {
|
|||
Choice::Choice(rule_ptr left, rule_ptr right) : left(left), right(right) {};
|
||||
|
||||
rule_ptr choice(const std::initializer_list<rule_ptr> &rules) {
|
||||
return build_binary_rule_tree<Choice>(rules);
|
||||
rule_ptr result;
|
||||
for (auto rule : rules)
|
||||
result = result.get() ? std::make_shared<Choice>(result, rule) : rule;
|
||||
return result;
|
||||
}
|
||||
|
||||
TransitionMap<Rule> Choice::transitions() const {
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ namespace tree_sitter {
|
|||
break;
|
||||
case '\\':
|
||||
next();
|
||||
result = character(peek());
|
||||
result = escaped_char(peek());
|
||||
next();
|
||||
break;
|
||||
default:
|
||||
|
|
@ -67,6 +67,21 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
rule_ptr escaped_char(char value) {
|
||||
switch (value) {
|
||||
case '(':
|
||||
case ')':
|
||||
return character(value);
|
||||
case 'w':
|
||||
return char_class(CharClassTypeWord);
|
||||
case 'd':
|
||||
return char_class(CharClassTypeDigit);
|
||||
default:
|
||||
error("unrecognized escape sequence");
|
||||
return rule_ptr();
|
||||
}
|
||||
}
|
||||
|
||||
void next() {
|
||||
position++;
|
||||
}
|
||||
|
|
@ -103,7 +118,8 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
bool Pattern::operator ==(tree_sitter::rules::Rule const &other) const {
|
||||
return false;
|
||||
auto pattern = dynamic_cast<const Pattern *>(&other);
|
||||
return pattern && (pattern->value == value);
|
||||
}
|
||||
|
||||
std::string Pattern::to_string() const {
|
||||
|
|
|
|||
|
|
@ -21,14 +21,6 @@ namespace tree_sitter {
|
|||
|
||||
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
|
||||
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
|
||||
|
||||
template <typename RuleClass>
|
||||
rule_ptr build_binary_rule_tree(const std::initializer_list<rule_ptr> &rules) {
|
||||
rule_ptr result;
|
||||
for (auto rule : rules)
|
||||
result = result.get() ? std::make_shared<RuleClass>(result, rule) : rule;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,12 @@ namespace tree_sitter {
|
|||
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {};
|
||||
|
||||
rule_ptr seq(const std::initializer_list<rule_ptr> &rules) {
|
||||
return build_binary_rule_tree<Seq>(rules);
|
||||
rule_ptr result;
|
||||
for (auto rule : rules)
|
||||
result = (result.get() && typeid(*result) != typeid(Blank)) ?
|
||||
std::make_shared<Seq>(result, rule) :
|
||||
rule;
|
||||
return result;
|
||||
}
|
||||
|
||||
TransitionMap<Rule> Seq::transitions() const {
|
||||
|
|
|
|||
|
|
@ -60,6 +60,15 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
TransitionMap<MappedType> where(std::function<bool(rule_ptr)> filter_fn) {
|
||||
TransitionMap<MappedType> result;
|
||||
for (pair_type pair : *this)
|
||||
if (filter_fn(pair.first))
|
||||
result.add(pair.first, pair.second);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename NewMappedType>
|
||||
TransitionMap<NewMappedType> map(std::function<std::shared_ptr<const NewMappedType>(mapped_ptr)> map_fn) {
|
||||
TransitionMap<NewMappedType> result;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue