Compute item set transitions

This commit is contained in:
Max Brunsfeld 2013-11-20 19:00:20 -08:00
parent 100ab56779
commit 99c216f78a
23 changed files with 250 additions and 55 deletions

View file

@ -25,6 +25,7 @@
12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; };
12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; };
12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; };
12F8BE8E183C79B2006CCF99 /* char_class.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F8BE8C183C79B2006CCF99 /* char_class.cpp */; };
12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; };
12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; };
27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; };
@ -148,6 +149,8 @@
12D136A3183678A2005F3369 /* repeat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = repeat.h; sourceTree = "<group>"; };
12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; };
12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = "<group>"; };
12F8BE8C183C79B2006CCF99 /* char_class.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = char_class.cpp; sourceTree = "<group>"; };
12F8BE8D183C79B2006CCF99 /* char_class.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = char_class.h; sourceTree = "<group>"; };
12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; };
12F9A64D182DD5FD00FAF50C /* spec_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = spec_helper.h; path = spec/spec_helper.h; sourceTree = SOURCE_ROOT; };
12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = grammar.cpp; sourceTree = "<group>"; };
@ -188,6 +191,8 @@
12130616182C3D2900FCF928 /* string.h */,
12130609182C389100FCF928 /* symbol.cpp */,
1213060A182C389100FCF928 /* symbol.h */,
12F8BE8C183C79B2006CCF99 /* char_class.cpp */,
12F8BE8D183C79B2006CCF99 /* char_class.h */,
);
path = rules;
sourceTree = "<group>";
@ -537,6 +542,7 @@
1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */,
1213061B182C84DF00FCF928 /* item.cpp in Sources */,
12130617182C3D2900FCF928 /* string.cpp in Sources */,
12F8BE8E183C79B2006CCF99 /* char_class.cpp in Sources */,
12130611182C3A1100FCF928 /* blank.cpp in Sources */,
12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */,
1213060E182C398300FCF928 /* choice.cpp in Sources */,

View file

@ -1,23 +1,56 @@
#include "spec_helper.h"
#include "../test_grammars/arithmetic.h"
#include <memory>
using namespace tree_sitter::lr;
using namespace tree_sitter::rules;
Describe(item_sets) {
static item_set_ptr item_set(const std::initializer_list<Item> &items) {
return item_set_ptr(new ItemSet(items));
}
Describe_Only(item_sets) {
Grammar grammar = test_grammars::arithmetic();
It(computes_the_closure_of_an_item_set_under_symbol_expansion) {
Item item = Item::at_beginning_of_rule("term", grammar);
ItemSet item_set = ItemSet({ item }).closure_in_grammar(grammar);
Item item = Item::at_beginning_of_rule("expression", grammar);
ItemSet set = ItemSet(item, grammar);
AssertThat(
item_set,
EqualsContainer(ItemSet({
Item("term", grammar.rule("term"), 0),
Item("factor", grammar.rule("factor"), 0),
Item("variable", grammar.rule("variable"), 0),
Item("number", grammar.rule("number"), 0),
Item("left_paren", grammar.rule("left_paren"), 0),
})));
set,
EqualsContainer(ItemSet({
Item("expression", grammar.rule("expression"), 0),
Item("term", grammar.rule("term"), 0),
Item("factor", grammar.rule("factor"), 0),
Item("variable", grammar.rule("variable"), 0),
Item("number", grammar.rule("number"), 0),
Item("left_paren", grammar.rule("left_paren"), 0),
})));
}
It(computes_transitions) {
Item item = Item::at_beginning_of_rule("factor", grammar);
ItemSet set = ItemSet(item, grammar);
AssertThat(
set.sym_transitions(grammar),
EqualsContainer(TransitionMap<ItemSet>({
{ sym("variable"), item_set({ Item("factor", blank(), 1) }) },
{ sym("number"), item_set({ Item("factor", blank(), 1) }) },
{ sym("left_paren"), std::make_shared<ItemSet>(Item("factor", seq({ sym("expression"), sym("right_paren") }), 1), grammar) },
}), TransitionMap<ItemSet>::elements_equal));
}
It(computes_character_transitions) {
Item item = Item::at_beginning_of_rule("factor", grammar);
ItemSet set = ItemSet(item, grammar);
AssertThat(
set.char_transitions(grammar),
EqualsContainer(TransitionMap<ItemSet>({
{ char_class(CharClassTypeWord), item_set({ Item("variable", choice({ repeat(char_class(CharClassTypeWord)), blank() }), 1) }) },
{ char_class(CharClassTypeDigit), item_set({ Item("number", choice({ repeat(char_class(CharClassTypeDigit)), blank() }), 1) }) },
{ character('('), item_set({ Item("left_paren", blank(), 1) }) }
}), TransitionMap<ItemSet>::elements_equal));
}
};

View file

@ -15,7 +15,18 @@ Describe(pattern_rules) {
character('c')
})->to_string()));
};
It(parses_character_classes) {
pattern_ptr rule = pattern("\\w-\\d");
AssertThat(
rule->to_rule_tree()->to_string(),
Equals(seq({
char_class(CharClassTypeWord),
character('-'),
char_class(CharClassTypeDigit)
})->to_string()));
};
It(parses_choices) {
pattern_ptr rule = pattern("ab|cd|ef");
AssertThat(

View file

@ -42,6 +42,15 @@ Describe(Rules) {
})));
}
It(handles_character_classes) {
auto rule = rules::char_class(rules::CharClassTypeDigit);
AssertThat(
rule->transitions(),
EqualsTransitionMap(TransitionMap<rules::Rule>({
{ rule, rules::blank() }
})));
}
It(handles_choices) {
AssertThat(
rules::choice({ symbol1, symbol2 })->transitions(),
@ -117,6 +126,18 @@ Describe(Rules) {
})
})
}})));
repeat = rules::repeat(rules::str("a"));
AssertThat(
repeat->transitions(),
EqualsTransitionMap(TransitionMap<rules::Rule>({
{
rules::character('a'),
rules::choice({
repeat,
rules::blank()
})
}})));
}
};
};

View file

@ -1,6 +1,6 @@
#include "spec_helper.h"
EqualsContainerConstraint<rule_tmap, rule_tmap_comparator> EqualsTransitionMap(const rule_tmap &expected) {
return EqualsContainer(expected, rule_tmap::elements_equal);
EqualsContainerConstraint<TransitionMap<rules::Rule>, rule_tmap_comparator> EqualsTransitionMap(const TransitionMap<rules::Rule> &expected) {
return EqualsContainer(expected, TransitionMap<rules::Rule>::elements_equal);
}

View file

@ -14,7 +14,6 @@ using namespace std;
using namespace igloo;
// Assertion helpers for transition maps
typedef TransitionMap<rules::Rule> rule_tmap;
typedef bool (* rule_tmap_comparator)(const std::pair<rules::rule_ptr, rules::rule_ptr> &, const std::pair<rules::rule_ptr, rules::rule_ptr> &);
EqualsContainerConstraint<TransitionMap<rules::Rule>, rule_tmap_comparator> EqualsTransitionMap(const TransitionMap<rules::Rule> &expected);

View file

@ -5,7 +5,7 @@ namespace tree_sitter {
rules(rules),
start_rule_name(rules.begin()->first) {}
const rules::rule_ptr Grammar::rule(const std::string &name) {
const rules::rule_ptr Grammar::rule(const std::string &name) const {
auto iter = rules.find(name);
return (iter == rules.end()) ?
rules::rule_ptr(nullptr) :

View file

@ -11,7 +11,7 @@ namespace tree_sitter {
public:
Grammar(const rule_map_init_list &rules);
const rules::rule_ptr rule(const std::string &);
const rules::rule_ptr rule(const std::string &) const;
const std::string start_rule_name;
private:

View file

@ -10,13 +10,13 @@ namespace tree_sitter {
rule(rule),
consumed_sym_count(consumed_sym_count) {};
Item Item::at_beginning_of_rule(const std::string &rule_name, Grammar &grammar) {
Item Item::at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar) {
return Item(rule_name, grammar.rule(rule_name), 0);
}
TransitionMap<Item> Item::transitions() const {
return rule->transitions().map<Item>([&](rules::rule_ptr to_rule) {
return item_ptr(new Item(rule_name, to_rule, consumed_sym_count + 1));
return std::make_shared<Item>(rule_name, to_rule, consumed_sym_count + 1);
});
};
@ -24,16 +24,15 @@ namespace tree_sitter {
vector<rules::sym_ptr> result;
for (auto pair : rule->transitions()) {
shared_ptr<const rules::Symbol> sym = dynamic_pointer_cast<const rules::Symbol>(pair.first);
if (sym != nullptr) result.push_back(sym);
if (sym) result.push_back(sym);
}
return result;
}
bool Item::operator==(const Item &other) const {
return (
other.rule_name == rule_name &&
other.rule == rule &&
other.consumed_sym_count == consumed_sym_count);
bool rule_names_eq = other.rule_name == rule_name;
bool rules_eq = (*other.rule == *rule);
return rule_names_eq && rules_eq;
}
std::ostream& operator<<(ostream &stream, const Item &item) {

View file

@ -12,7 +12,7 @@ namespace tree_sitter {
class Item {
public:
Item(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count);
static Item at_beginning_of_rule(const std::string &rule_name, Grammar &grammar);
static Item at_beginning_of_rule(const std::string &rule_name, const Grammar &grammar);
TransitionMap<Item> transitions() const;
std::vector<rules::sym_ptr> next_symbols() const;

View file

@ -8,16 +8,12 @@ namespace tree_sitter {
namespace lr {
ItemSet::ItemSet(const vector<Item> &items) : contents(items) {}
ItemSet::ItemSet(const initializer_list<Item> &items) : contents(items) {}
TransitionMap<ItemSet> ItemSet::transitions() const {
return TransitionMap<ItemSet>();
}
bool vector_contains(vector<Item> items, lr::Item item) {
static bool vector_contains(vector<Item> items, lr::Item item) {
return (std::find(items.begin(), items.end(), item) != items.end());
}
void add_item(vector<Item> &vector, const Item &item, Grammar &grammar) {
static void add_item(vector<Item> &vector, const Item &item, const Grammar &grammar) {
if (!vector_contains(vector, item)) {
vector.push_back(item);
for (rules::sym_ptr rule : item.next_symbols()) {
@ -26,12 +22,51 @@ namespace tree_sitter {
}
}
}
static vector<Item> closure_in_grammar(const Item &item, const Grammar &grammar) {
vector<Item> result;
add_item(result, item, grammar);
return result;
}
ItemSet::ItemSet(const Item &item, const Grammar &grammar) : contents(closure_in_grammar(item, grammar)) {}
TransitionMap<ItemSet> ItemSet::char_transitions(const Grammar &grammar) const {
auto result = TransitionMap<ItemSet>();
for (auto item : *this) {
auto new_set = item.transitions()
.where([&](const rules::rule_ptr &on_rule) -> bool {
return typeid(*on_rule) != typeid(rules::Symbol);
})
.map<ItemSet>([&](const item_ptr &item) -> item_set_ptr {
return std::make_shared<ItemSet>(*item, grammar);
});
result.merge(new_set, [&](const item_set_ptr left, const item_set_ptr right) -> item_set_ptr {
return left;
});
}
return result;
}
ItemSet ItemSet::closure_in_grammar(Grammar &grammar) const {
vector<Item> items;
for (Item item : *this)
add_item(items, item, grammar);
return ItemSet(items);
TransitionMap<ItemSet> ItemSet::sym_transitions(const Grammar &grammar) const {
auto result = TransitionMap<ItemSet>();
for (auto item : *this) {
auto new_set = item.transitions()
.where([&](const rules::rule_ptr &on_rule) -> bool {
return typeid(*on_rule) == typeid(rules::Symbol);
})
.map<ItemSet>([&](const item_ptr &item) -> item_set_ptr {
return std::make_shared<ItemSet>(*item, grammar);
});
result.merge(new_set, [&](const item_set_ptr left, const item_set_ptr right) -> item_set_ptr {
return left;
});
}
return result;
}
bool ItemSet::operator==(const tree_sitter::lr::ItemSet &other) const {
return contents == other.contents;
}
#pragma mark - container

View file

@ -6,10 +6,14 @@
namespace tree_sitter {
namespace lr {
class ItemSet;
typedef std::shared_ptr<const ItemSet> item_set_ptr;
class ItemSet {
public:
ItemSet(const std::vector<Item> &items);
ItemSet(const std::initializer_list<Item> &items);
ItemSet(const Item &item, const Grammar &grammar);
typedef Item value_type;
typedef std::vector<Item>::const_iterator const_iterator;
@ -17,12 +21,14 @@ namespace tree_sitter {
const_iterator end() const;
size_t size() const;
ItemSet closure_in_grammar(Grammar &grammar) const;
TransitionMap<ItemSet> transitions() const;
TransitionMap<ItemSet> sym_transitions(const Grammar &grammar) const;
TransitionMap<ItemSet> char_transitions(const Grammar &grammar) const;
bool operator==(const ItemSet &other) const;
const std::vector<Item> contents;
};
typedef std::shared_ptr<const ItemSet> item_set_ptr;
std::ostream& operator<<(std::ostream &stream, const ItemSet &item_set);
}
}

View file

@ -9,6 +9,7 @@
#include "string.h"
#include "pattern.h"
#include "char.h"
#include "char_class.h"
#include "repeat.h"
#endif

View file

@ -14,7 +14,7 @@ namespace tree_sitter {
}
bool Blank::operator==(const Rule &rule) const {
return dynamic_cast<const Blank *>(&rule) != NULL;
return dynamic_cast<const Blank *>(&rule) != nullptr;
}
std::string Blank::to_string() const {

View file

@ -8,7 +8,7 @@ namespace tree_sitter {
namespace rules {
Char::Char(char value) : value(value) {};
char_ptr character(char value) {
rule_ptr character(char value) {
return std::make_shared<Char>(value);
}

View file

@ -15,8 +15,7 @@ namespace tree_sitter {
const char value;
};
typedef std::shared_ptr<const Char> char_ptr;
char_ptr character(char value);
rule_ptr character(char value);
}
}

33
src/rules/char_class.cpp Normal file
View file

@ -0,0 +1,33 @@
#include "char_class.h"
#include "blank.h"
#include "transition_map.h"
using namespace std;
namespace tree_sitter {
namespace rules {
CharClass::CharClass(CharClassType value) : value(value) {};
rule_ptr char_class(CharClassType type) {
return std::make_shared<CharClass>(type);
}
TransitionMap<Rule> CharClass::transitions() const {
return TransitionMap<Rule>({{ char_class(value), blank() }});
}
bool CharClass::operator==(const Rule &rule) const {
const CharClass *other = dynamic_cast<const CharClass *>(&rule);
return other && (other->value == value);
}
string CharClass::to_string() const {
switch (value) {
case CharClassTypeDigit:
return "<digit>";
case CharClassTypeWord:
return "<word>";
}
}
}
}

27
src/rules/char_class.h Normal file
View file

@ -0,0 +1,27 @@
#ifndef __tree_sitter__char_class__
#define __tree_sitter__char_class__
#include "rule.h"
namespace tree_sitter {
namespace rules {
typedef enum {
CharClassTypeWord,
CharClassTypeDigit
} CharClassType;
class CharClass : public Rule {
public:
CharClass(CharClassType type);
TransitionMap<Rule> transitions() const;
bool operator==(const Rule& other) const;
std::string to_string() const;
private:
const CharClassType value;
};
rule_ptr char_class(CharClassType value);
}
}
#endif

View file

@ -6,7 +6,10 @@ namespace tree_sitter {
Choice::Choice(rule_ptr left, rule_ptr right) : left(left), right(right) {};
rule_ptr choice(const std::initializer_list<rule_ptr> &rules) {
return build_binary_rule_tree<Choice>(rules);
rule_ptr result;
for (auto rule : rules)
result = result.get() ? std::make_shared<Choice>(result, rule) : rule;
return result;
}
TransitionMap<Rule> Choice::transitions() const {

View file

@ -56,7 +56,7 @@ namespace tree_sitter {
break;
case '\\':
next();
result = character(peek());
result = escaped_char(peek());
next();
break;
default:
@ -67,6 +67,21 @@ namespace tree_sitter {
return result;
}
rule_ptr escaped_char(char value) {
switch (value) {
case '(':
case ')':
return character(value);
case 'w':
return char_class(CharClassTypeWord);
case 'd':
return char_class(CharClassTypeDigit);
default:
error("unrecognized escape sequence");
return rule_ptr();
}
}
void next() {
position++;
}
@ -103,7 +118,8 @@ namespace tree_sitter {
}
bool Pattern::operator ==(tree_sitter::rules::Rule const &other) const {
return false;
auto pattern = dynamic_cast<const Pattern *>(&other);
return pattern && (pattern->value == value);
}
std::string Pattern::to_string() const {

View file

@ -21,14 +21,6 @@ namespace tree_sitter {
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
template <typename RuleClass>
rule_ptr build_binary_rule_tree(const std::initializer_list<rule_ptr> &rules) {
rule_ptr result;
for (auto rule : rules)
result = result.get() ? std::make_shared<RuleClass>(result, rule) : rule;
return result;
}
}
}

View file

@ -7,7 +7,12 @@ namespace tree_sitter {
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {};
rule_ptr seq(const std::initializer_list<rule_ptr> &rules) {
return build_binary_rule_tree<Seq>(rules);
rule_ptr result;
for (auto rule : rules)
result = (result.get() && typeid(*result) != typeid(Blank)) ?
std::make_shared<Seq>(result, rule) :
rule;
return result;
}
TransitionMap<Rule> Seq::transitions() const {

View file

@ -60,6 +60,15 @@ namespace tree_sitter {
}
}
TransitionMap<MappedType> where(std::function<bool(rule_ptr)> filter_fn) {
TransitionMap<MappedType> result;
for (pair_type pair : *this)
if (filter_fn(pair.first))
result.add(pair.first, pair.second);
return result;
}
template<typename NewMappedType>
TransitionMap<NewMappedType> map(std::function<std::shared_ptr<const NewMappedType>(mapped_ptr)> map_fn) {
TransitionMap<NewMappedType> result;