Compute closure of item sets under symbol expansion

This commit is contained in:
Max Brunsfeld 2013-11-12 18:37:02 -08:00
parent 6bbaba3ef8
commit f7063ba1d8
23 changed files with 251 additions and 91 deletions

View file

@ -21,6 +21,9 @@
12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12512092182F307C00C9B56A /* parse_table_spec.cpp */; };
1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; };
1251209D18303CFB00C9B56A /* rules.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209C18303CFB00C9B56A /* rules.cpp */; };
125120A018307DEC00C9B56A /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209E18307DEC00C9B56A /* parse_table.cpp */; };
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; };
12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; };
12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; };
27A343CA69E17E0F9EBEDF1C /* Pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* Pattern.cpp */; };
@ -133,7 +136,12 @@
12512092182F307C00C9B56A /* parse_table_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_spec.cpp; path = spec/lr/parse_table_spec.cpp; sourceTree = SOURCE_ROOT; };
1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = "<group>"; };
1251209C18303CFB00C9B56A /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = "<group>"; };
1251209E18307DEC00C9B56A /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = "<group>"; };
1251209F18307DEC00C9B56A /* parse_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table.h; sourceTree = "<group>"; };
125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/test_grammars/arithmetic.h; sourceTree = SOURCE_ROOT; };
125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/test_grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; };
12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transition_map.h; sourceTree = "<group>"; };
12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; };
12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; };
12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = "<group>"; };
12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; };
@ -185,6 +193,8 @@
1213061A182C84DF00FCF928 /* item.h */,
12130620182C85D300FCF928 /* item_set.cpp */,
12130621182C85D300FCF928 /* item_set.h */,
1251209E18307DEC00C9B56A /* parse_table.cpp */,
1251209F18307DEC00C9B56A /* parse_table.h */,
);
path = lr;
sourceTree = "<group>";
@ -194,6 +204,7 @@
children = (
1213061D182C857100FCF928 /* item_set_spec.cpp */,
12512092182F307C00C9B56A /* parse_table_spec.cpp */,
12D1369C18328C5A005F3369 /* item_spec.cpp */,
);
name = lr;
path = spec/lr;
@ -398,6 +409,16 @@
path = collections;
sourceTree = "<group>";
};
125120A118307FCA00C9B56A /* test_grammars */ = {
isa = PBXGroup;
children = (
125120A218307FFD00C9B56A /* arithmetic.h */,
125120A3183083BD00C9B56A /* arithmetic.cpp */,
);
name = test_grammars;
path = spec/test_grammars;
sourceTree = "<group>";
};
12E716F9181D010E0051A649 = {
isa = PBXGroup;
children = (
@ -432,6 +453,7 @@
12E71796181D02A80051A649 /* spec */ = {
isa = PBXGroup;
children = (
125120A118307FCA00C9B56A /* test_grammars */,
1214925C181E200B008E9BDA /* externals */,
1213061C182C854F00FCF928 /* lr */,
121492E9181E200B008E9BDA /* main.cpp */,
@ -494,13 +516,16 @@
buildActionMask = 2147483647;
files = (
12130614182C3A1700FCF928 /* seq.cpp in Sources */,
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */,
1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */,
1213061B182C84DF00FCF928 /* item.cpp in Sources */,
1251209D18303CFB00C9B56A /* rules.cpp in Sources */,
12130617182C3D2900FCF928 /* string.cpp in Sources */,
12130611182C3A1100FCF928 /* blank.cpp in Sources */,
12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */,
1213060E182C398300FCF928 /* choice.cpp in Sources */,
12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */,
125120A018307DEC00C9B56A /* parse_table.cpp in Sources */,
1214930E181E200B008E9BDA /* main.cpp in Sources */,
12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */,
12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */,

View file

@ -1,21 +1,23 @@
#include "spec_helper.h"
#include "../test_grammars/arithmetic.h"
using namespace tree_sitter::lr;
Describe(item_sets) {
Describe(transitions) {
Grammar grammar = Grammar({
"one",
"two"
}, {
rules::sym("one"),
rules::sym("two")
});
Grammar grammar = test_grammars::arithmetic();
It(computes_the_closure_of_an_item_set_under_symbol_expansion) {
Item item = Item::at_beginning_of_rule("term", grammar);
ItemSet item_set = ItemSet({ item }).closure_in_grammar(grammar);
rules::rule_ptr rule = grammar.rules[string("one")];
lr::Item item = lr::Item(string("one"), rule, 0);
It(works) {
lr::ItemSet item_set = lr::ItemSet(item, grammar);
item_set.transitions();
}
};
AssertThat(
item_set,
EqualsContainer(ItemSet({
Item("term", grammar.rules["term"], 0),
Item("factor", grammar.rules["factor"], 0),
Item("variable", grammar.rules["variable"], 0),
Item("number", grammar.rules["number"], 0),
Item("left_paren", grammar.rules["left_paren"], 0),
})));
}
};

15
spec/lr/item_spec.cpp Normal file
View file

@ -0,0 +1,15 @@
#include "spec_helper.h"
#include "../test_grammars/arithmetic.h"
using namespace tree_sitter::lr;
Describe(items) {
Describe(transitions) {
Grammar grammar = test_grammars::arithmetic();
It(finds_the_item_at_the_start_of_a_rule) {
Item item = Item::at_beginning_of_rule("expression", grammar);
AssertThat(item, Equals(Item("expression", grammar.rules["expression"], 0)));
}
};
};

View file

@ -1,43 +1,7 @@
#include "spec_helper.h"
Describe(parse_table_construction) {
Grammar grammar = Grammar(
{
"expression",
"term",
"factor",
"number",
"variable",
"plus",
"times",
"left_paren",
"right_paren"
}, {
rules::choice({
rules::seq({
rules::sym("term"),
rules::sym("plus"),
rules::sym("term") }),
rules::sym("term") }),
rules::choice({
rules::seq({
rules::sym("factor"),
rules::sym("times"),
rules::sym("factor") }),
rules::sym("factor") }),
rules::choice({
rules::sym("variable"),
rules::sym("number"),
rules::seq({
rules::sym("left_paren"),
rules::sym("expression"),
rules::sym("right_paren") }) }),
rules::pattern("\\d+"),
rules::pattern("\\w+"),
rules::str("+"),
rules::str("*"),
rules::str("("),
rules::str(")")
}
);
Describe(the_starting_state) {
};
};

View file

@ -11,11 +11,11 @@ Describe(Rules) {
It(constructs_binary_trees) {
AssertThat(
rules::seq({ symbol1, symbol2, symbol3 })->to_string(),
Equals(std::string("(seq 1 (seq 2 3))")));
Equals(std::string("(seq (sym '1') (seq (sym '2') (sym '3')))")));
AssertThat(
rules::choice({ symbol1, symbol2, symbol3 })->to_string(),
Equals(std::string("(choice 1 (choice 2 3))")));
Equals(std::string("(choice (sym '1') (choice (sym '2') (sym '3')))")));
}
};

View file

@ -1,5 +1,6 @@
#include "spec_helper.h"
EqualsContainerConstraint<rule_tmap, rule_tmap_comparator> EqualsTransitionMap(const rule_tmap &expected) {
return EqualsContainer(expected, rule_tmap::elements_equal);
}

View file

@ -3,14 +3,15 @@
#include "igloo/igloo_alt.h"
#include "transition_map.h"
#include "rule.h"
#include "rules.h"
#include "item.h"
#include "item_set.h"
#include "grammar.h"
using namespace igloo;
using namespace tree_sitter;
using namespace std;
using namespace igloo;
// Assertion helpers for transition maps
typedef TransitionMap<rules::Rule> rule_tmap;

View file

@ -0,0 +1,47 @@
#include "arithmetic.h"
#include "rules.h"
using namespace tree_sitter;
using namespace tree_sitter::rules;
namespace test_grammars {
Grammar arithmetic() {
return Grammar({
"expression",
"term",
"factor",
"number",
"variable",
"plus",
"times",
"left_paren",
"right_paren"
}, {
choice({
seq({
sym("term"),
sym("plus"),
sym("term") }),
sym("term") }),
choice({
seq({
sym("factor"),
sym("times"),
sym("factor") }),
sym("factor") }),
choice({
sym("variable"),
sym("number"),
seq({
sym("left_paren"),
sym("expression"),
sym("right_paren") }) }),
pattern("\\d+"),
pattern("\\w+"),
str("+"),
str("*"),
str("("),
str(")")
});
}
}

View file

@ -0,0 +1,10 @@
#ifndef TreeSitter_arithmetic_h
#define TreeSitter_arithmetic_h
#include "grammar.h"
namespace test_grammars {
tree_sitter::Grammar arithmetic();
}
#endif

View file

@ -6,15 +6,21 @@ namespace tree_sitter {
Grammar::Grammar(const rule_map &rules, const std::string &start_rule_name) :
rules(rules),
start_rule_name(start_rule_name) {};
Grammar::Grammar(const initializer_list<string> &rule_names,
const initializer_list<rules::rule_ptr> &rule_vals) {
rules = rule_map();
std::unordered_map<std::string, rules::rule_ptr> build_rule_map(const initializer_list<string> &rule_names,
const initializer_list<rules::rule_ptr> &rule_vals) {
std::unordered_map<std::string, rules::rule_ptr> result;
auto rule_name_i = rule_names.begin();
auto rule_i = rule_vals.begin();
start_rule_name = *rule_name_i;
for (; rule_i != rule_vals.end(); rule_i++ && rule_name_i++) {
rules[*rule_name_i] = *rule_i;
while (rule_i != rule_vals.end()) {
result[*rule_name_i] = *rule_i;
rule_i++;
rule_name_i++;
}
return result;
}
Grammar::Grammar(const initializer_list<string> &names, const initializer_list<rules::rule_ptr> &values) :
rules(build_rule_map(names, values)),
start_rule_name(*names.begin()) {}
}

View file

@ -12,7 +12,7 @@ namespace tree_sitter {
Grammar(const std::initializer_list<std::string> &rule_names,
const std::initializer_list<rules::rule_ptr> &rules);
rule_map rules;
std::string start_rule_name;
const std::string start_rule_name;
};
}

View file

@ -1,4 +1,5 @@
#include "item.h"
#include "grammar.h"
using namespace std;
@ -9,16 +10,22 @@ namespace tree_sitter {
rule(rule),
consumed_sym_count(consumed_sym_count) {};
Item Item::at_beginning_of_rule(const std::string &rule_name, Grammar &grammar) {
return Item(rule_name, grammar.rules[rule_name], 0);
}
TransitionMap<Item> Item::transitions() const {
return rule->transitions().map<Item>([&](rules::rule_ptr to_rule) {
return item_ptr(new Item(rule_name, to_rule, consumed_sym_count + 1));
});
};
vector<rules::rule_ptr> Item::next_symbols() const {
vector<rules::rule_ptr> result;
for (auto pair : rule->transitions())
result.push_back(pair.second);
vector<rules::sym_ptr> Item::next_symbols() const {
vector<rules::sym_ptr> result;
for (auto pair : rule->transitions()) {
shared_ptr<const rules::Symbol> sym = dynamic_pointer_cast<const rules::Symbol>(pair.first);
if (sym != nullptr) result.push_back(sym);
}
return result;
}

View file

@ -6,12 +6,16 @@
#include "transition_map.h"
namespace tree_sitter {
class Grammar;
namespace lr {
class Item {
public:
Item(const std::string &rule_name, const rules::rule_ptr rule, int consumed_sym_count);
static Item at_beginning_of_rule(const std::string &rule_name, Grammar &grammar);
TransitionMap<Item> transitions() const;
std::vector<rules::rule_ptr> next_symbols() const;
std::vector<rules::sym_ptr> next_symbols() const;
bool operator==(const Item &other) const;
const std::string rule_name;
@ -19,7 +23,7 @@ namespace tree_sitter {
const int consumed_sym_count;
};
typedef std::shared_ptr<Item> item_ptr;
typedef std::shared_ptr<const Item> item_ptr;
std::ostream& operator<<(std::ostream &stream, const Item &item);
}
}

View file

@ -1,13 +1,62 @@
#include "item_set.h"
#include <iostream>
using namespace std;
namespace tree_sitter {
namespace lr {
ItemSet::ItemSet(const Item &item, const Grammar &grammar) {
}
ItemSet::ItemSet(const vector<Item> &items) : contents(items) {}
ItemSet::ItemSet(const initializer_list<Item> &items) : contents(items) {}
TransitionMap<ItemSet> ItemSet::transitions() const {
return TransitionMap<ItemSet>();
}
bool vector_contains(vector<Item> items, lr::Item item) {
return (std::find(items.begin(), items.end(), item) != items.end());
}
void add_item(vector<Item> &vector, const Item &item, Grammar &grammar) {
if (!vector_contains(vector, item)) {
vector.push_back(item);
for (rules::sym_ptr rule : item.next_symbols()) {
Item next_item = Item::at_beginning_of_rule(rule->name, grammar);
add_item(vector, next_item, grammar);
}
}
}
ItemSet ItemSet::closure_in_grammar(Grammar &grammar) const {
vector<Item> items;
for (Item item : *this)
add_item(items, item, grammar);
return ItemSet(items);
}
#pragma mark - container
ItemSet::const_iterator ItemSet::begin() const {
return contents.begin();
}
ItemSet::const_iterator ItemSet::end() const {
return contents.end();
}
size_t ItemSet::size() const {
return contents.size();
}
#pragma mark - printing
ostream& operator<<(ostream &stream, const ItemSet &item_set) {
stream << string("(item_set ");
for (Item item : item_set) {
stream << item;
stream << string(" ");
}
stream << string(")");
return stream;
}
}
}
}

View file

@ -8,9 +8,22 @@ namespace tree_sitter {
namespace lr {
class ItemSet {
public:
ItemSet(const Item &item, const Grammar &grammar);
ItemSet(const std::vector<Item> &items);
ItemSet(const std::initializer_list<Item> &items);
typedef Item value_type;
typedef std::vector<Item>::const_iterator const_iterator;
const_iterator begin() const;
const_iterator end() const;
size_t size() const;
ItemSet closure_in_grammar(Grammar &grammar) const;
TransitionMap<ItemSet> transitions() const;
const std::vector<Item> contents;
};
std::ostream& operator<<(std::ostream &stream, const ItemSet &item_set);
}
}

1
src/lr/parse_table.cpp Normal file
View file

@ -0,0 +1 @@
#include "parse_table.h"

13
src/lr/parse_table.h Normal file
View file

@ -0,0 +1,13 @@
#ifndef __TreeSitter__parse_table__
#define __TreeSitter__parse_table__
namespace tree_sitter {
namespace lr {
class ParseTable {
public:
ParseTable();
};
}
}
#endif

View file

@ -1,11 +1,4 @@
#include "rules.h"
#include "blank.h"
#include "symbol.h"
#include "choice.h"
#include "seq.h"
#include "string.h"
#include "pattern.h"
#include "char.h"
namespace tree_sitter {
namespace rules {

View file

@ -1,7 +1,14 @@
#ifndef __TreeSitter__rules__
#define __TreeSitter__rules__
#include "rules/rule.h"
#include "rule.h"
#include "blank.h"
#include "symbol.h"
#include "choice.h"
#include "seq.h"
#include "string.h"
#include "pattern.h"
#include "char.h"
namespace tree_sitter {
namespace rules {
@ -12,6 +19,8 @@ namespace tree_sitter {
rule_ptr pattern(const std::string &value);
rule_ptr seq(const std::initializer_list<rule_ptr> &rules);
rule_ptr choice(const std::initializer_list<rule_ptr> &rules);
typedef std::shared_ptr<const Symbol> sym_ptr;
}
}

View file

@ -24,7 +24,7 @@ namespace tree_sitter {
}
std::string String::to_string() const {
return value;
return std::string("(string '") + value + "')";
}
}
}

View file

@ -17,7 +17,7 @@ namespace tree_sitter {
}
std::string Symbol::to_string() const {
return name;
return std::string("(sym '") + name + "')";
}
}
}

View file

@ -13,8 +13,7 @@ namespace tree_sitter {
Symbol * copy() const;
bool operator==(const Rule& other) const;
std::string to_string() const;
private:
std::string name;
const std::string name;
};
}
}

View file

@ -92,7 +92,8 @@ namespace tree_sitter {
bool started = false;
for (auto pair : map) {
if (started) stream << std::string(", ");
stream << (pair.first->to_string() + " => " + pair.second->to_string());
stream << pair.first->to_string() << std::string(" => ");
stream << *pair.second;
started = true;
}
stream << std::string("]");