Make separate functions for character-level and syntax-level rule transitions

This commit is contained in:
Max Brunsfeld 2014-02-09 14:31:27 -08:00
parent e92ac719f4
commit 905a408998
9 changed files with 109 additions and 92 deletions

View file

@ -7,32 +7,24 @@ using namespace build_tables;
START_TEST
describe("rule transitions", []() {
rule_ptr symbol1 = sym("1");
rule_ptr symbol2 = sym("2");
rule_ptr symbol3 = sym("3");
rule_ptr symbol4 = sym("4");
rule_ptr char1 = character('a');
auto symbol1 = sym("1");
auto symbol2 = sym("2");
auto symbol3 = sym("3");
auto symbol4 = sym("4");
auto char1 = character({ 'a' });
it("handles symbols", [&]() {
AssertThat(
rule_transitions(symbol1),
Equals(transition_map<Rule, Rule>({
sym_transitions(symbol1),
Equals(transition_map<Symbol, Rule>({
{ symbol1, blank() }
})));
});
it("handles characters", [&]() {
AssertThat(
rule_transitions(char1),
Equals(transition_map<Rule, Rule>({
{ char1, blank() }
})));
});
it("handles choices", [&]() {
AssertThat(
rule_transitions(choice({ symbol1, symbol2 })),
Equals(transition_map<Rule, Rule>({
sym_transitions(choice({ symbol1, symbol2 })),
Equals(transition_map<Symbol, Rule>({
{ symbol1, blank() },
{ symbol2, blank() }
})));
@ -40,77 +32,84 @@ describe("rule transitions", []() {
it("handles sequences", [&]() {
AssertThat(
rule_transitions(seq({ symbol1, symbol2 })),
Equals(transition_map<Rule, Rule>({
sym_transitions(seq({ symbol1, symbol2 })),
Equals(transition_map<Symbol, Rule>({
{ symbol1, symbol2 }
})));
});
it("handles_long_sequences", [&]() {
it("handles long sequences", [&]() {
AssertThat(
rule_transitions(seq({
sym_transitions(seq({
symbol1,
symbol2,
symbol3,
symbol4
})),
Equals(transition_map<Rule, Rule>({
Equals(transition_map<Symbol, Rule>({
{ symbol1, seq({ symbol2, symbol3, symbol4 }) }
})));
});
it("handles sequences whose left sides can be blank", [&]() {
AssertThat(
rule_transitions(seq({
sym_transitions(seq({
choice({
sym("x"),
symbol1,
blank(),
}),
seq({
sym("x"),
sym("y")
symbol1,
symbol2
})
})), Equals(transition_map<Rule, Rule>({
{ sym("x"), choice({ seq({ sym("x"), sym("y") }), sym("y"), }) }
})), Equals(transition_map<Symbol, Rule>({
{ symbol1, choice({ seq({ symbol1, symbol2 }), symbol2, }) }
})));
});
it("handles choices with common starting symbols", [&]() {
AssertThat(
rule_transitions(
sym_transitions(
choice({
seq({ symbol1, symbol2 }),
seq({ symbol1, symbol3 }) })),
Equals(transition_map<Rule, Rule>({
Equals(transition_map<Symbol, Rule>({
{ symbol1, choice({ symbol2, symbol3 }) }
})));
});
it("handles characters", [&]() {
AssertThat(
char_transitions(char1),
Equals(transition_map<CharacterSet, Rule>({
{ char1, blank() }
})));
});
it("handles strings", [&]() {
AssertThat(
rule_transitions(str("bad")),
Equals(transition_map<Rule, Rule>({
{ character('b'), seq({ character('a'), character('d') })
}
})));
char_transitions(str("bad")),
Equals(transition_map<CharacterSet, Rule>({
{ character({ 'b' }, true), seq({ character('a'), character('d') }) }
})));
});
it("handles patterns", [&]() {
AssertThat(
rule_transitions(pattern("a|b")),
Equals(transition_map<Rule, Rule>({
{ character('a'), blank() },
{ character('b'), blank() }
char_transitions(pattern("a|b")),
Equals(transition_map<CharacterSet, Rule>({
{ character({ 'a' }, true), blank() },
{ character({ 'b' }, true), blank() }
})));
});
it("handles repeats", [&]() {
rule_ptr rule = repeat(str("ab"));
AssertThat(
rule_transitions(rule),
Equals(transition_map<Rule, Rule>({
char_transitions(rule),
Equals(transition_map<CharacterSet, Rule>({
{
character('a'),
character({ 'a' }, true),
seq({
character('b'),
choice({
@ -122,10 +121,10 @@ describe("rule transitions", []() {
rule = repeat(str("a"));
AssertThat(
rule_transitions(rule),
Equals(transition_map<Rule, Rule>({
char_transitions(rule),
Equals(transition_map<CharacterSet, Rule>({
{
character('a'),
character({ 'a' }, true),
choice({
rule,
blank()
@ -143,14 +142,14 @@ describe("rule transitions", []() {
character('"'),
});
AssertThat(rule_transitions(rule), Equals(transition_map<Rule, Rule>({
AssertThat(char_transitions(rule), Equals(transition_map<CharacterSet, Rule>({
{ character({ '"' }, false), seq({
choice({
repeat(character({ '"' }, false)),
blank(),
}),
character('"'), }) },
{ character('"'), blank() },
{ character({ '"' }, true), blank() },
})));
});
});

View file

@ -15,14 +15,14 @@ namespace tree_sitter {
unordered_map<Symbol, set<Symbol>> follow_sets(const ParseItem &item, const Grammar &grammar) {
unordered_map<Symbol, set<Symbol>> result;
for (auto pair : rule_transitions(item.rule)) {
auto symbol = dynamic_pointer_cast<const rules::Symbol>(pair.first);
if (symbol && grammar.has_definition(*symbol)) {
for (auto pair : sym_transitions(item.rule)) {
auto symbol = *pair.first;
if (grammar.has_definition(symbol)) {
auto following_non_terminals = first_set(pair.second, grammar);
if (rule_can_be_blank(pair.second)) {
following_non_terminals.insert(item.lookahead_sym);
}
result.insert({ *symbol, following_non_terminals });
result.insert({ symbol, following_non_terminals });
}
}

View file

@ -21,13 +21,11 @@ namespace tree_sitter {
transition_map<CharacterSet, LexItemSet> result;
for (LexItem item : item_set) {
transition_map<CharacterSet, LexItemSet> item_transitions;
for (auto transition : rule_transitions(item.rule)) {
auto rule = dynamic_pointer_cast<const CharacterSet>(transition.first);
if (rule.get()) {
auto new_item = LexItem(item.lhs, transition.second);
auto new_item_set = LexItemSet({ new_item });
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
}
for (auto transition : char_transitions(item.rule)) {
auto rule = transition.first;
auto new_item = LexItem(item.lhs, transition.second);
auto new_item_set = LexItemSet({ new_item });
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
}
result.merge(item_transitions, [](shared_ptr<const LexItemSet> left, shared_ptr<const LexItemSet> right) -> shared_ptr<const LexItemSet> {
@ -42,15 +40,13 @@ namespace tree_sitter {
transition_map<rules::Symbol, ParseItemSet> result;
for (ParseItem item : item_set) {
transition_map<rules::Symbol, ParseItemSet> item_transitions;
for (auto transition : rule_transitions(item.rule)) {
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
if (rule.get()) {
auto consumed_symbols = item.consumed_symbols;
consumed_symbols.push_back(rule->is_auxiliary);
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
}
for (auto transition : sym_transitions(item.rule)) {
auto rule = transition.first;
auto consumed_symbols = item.consumed_symbols;
consumed_symbols.push_back(rule->is_auxiliary);
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
}
result.merge(item_transitions, [](shared_ptr<const ParseItemSet> left, shared_ptr<const ParseItemSet> right) -> shared_ptr<const ParseItemSet> {

View file

@ -9,41 +9,55 @@ namespace tree_sitter {
return typeid(*rule) == typeid(Blank);
}
template<typename T>
class TransitionsVisitor : public rules::Visitor {
public:
transition_map<Rule, Rule> value;
transition_map<T, Rule> value;
static transition_map<T, Rule> transitions(const rule_ptr rule) {
TransitionsVisitor<T> visitor;
rule->accept(visitor);
return visitor.value;
}
void visit_atom(const Rule *rule) {
auto atom = dynamic_cast<const T *>(rule);
if (atom) {
value = transition_map<T, Rule>({{ std::make_shared<const T>(*atom), blank() }});
}
}
void visit(const CharacterSet *rule) {
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
visit_atom(rule);
}
void visit(const Symbol *rule) {
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
visit_atom(rule);
}
void visit(const Choice *rule) {
value = rule_transitions(rule->left);
value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
value = transitions(rule->left);
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
}
void visit(const Seq *rule) {
value = rule_transitions(rule->left).map<Rule>([&](const rule_ptr left_rule) -> rule_ptr {
value = transitions(rule->left).template map<Rule>([&](const rule_ptr left_rule) -> rule_ptr {
if (is_blank(left_rule))
return rule->right;
else
return seq({ left_rule, rule->right });
});
if (rule_can_be_blank(rule->left)) {
value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
}
}
void visit(const Repeat *rule) {
value = rule_transitions(rule->content).map<Rule>([&](const rule_ptr &value) -> rule_ptr {
value = transitions(rule->content).template map<Rule>([&](const rule_ptr &value) -> rule_ptr {
return seq({ value, choice({ rule->copy(), blank() }) });
});
}
@ -52,20 +66,22 @@ namespace tree_sitter {
rule_ptr result = character(rule->value[0]);
for (int i = 1; i < rule->value.length(); i++)
result = seq({ result, character(rule->value[i]) });
value = rule_transitions(result);
value = transitions(result);
}
void visit(const Pattern *rule) {
value = rule_transitions(rule->to_rule_tree());
value = transitions(rule->to_rule_tree());
}
};
transition_map<Rule, Rule> rule_transitions(const rule_ptr &rule) {
TransitionsVisitor visitor;
rule->accept(visitor);
return visitor.value;
transition_map<CharacterSet, Rule> char_transitions(const rule_ptr &rule) {
return TransitionsVisitor<CharacterSet>::transitions(rule);
}
transition_map<Symbol, Rule> sym_transitions(const rule_ptr &rule) {
return TransitionsVisitor<Symbol>::transitions(rule);
}
class EpsilonVisitor : public rules::Visitor {
public:
bool value;

View file

@ -1,13 +1,15 @@
#ifndef __tree_sitter__transitions__
#define __tree_sitter__transitions__
#include "rule.h"
#include "character_set.h"
#include "symbol.h"
#include "transition_map.h"
namespace tree_sitter {
namespace build_tables {
bool rule_can_be_blank(const rules::rule_ptr &rule);
transition_map<rules::Rule, rules::Rule> rule_transitions(const rules::rule_ptr &rule);
transition_map<rules::CharacterSet, rules::Rule> char_transitions(const rules::rule_ptr &rule);
transition_map<rules::Symbol, rules::Rule> sym_transitions(const rules::rule_ptr &rule);
}
}

View file

@ -50,6 +50,8 @@ namespace tree_sitter {
std::set<CharacterRange> ranges;
};
typedef std::shared_ptr<const CharacterSet> char_ptr;
}
}

View file

@ -11,16 +11,16 @@ namespace tree_sitter {
return make_shared<Blank>();
}
rule_ptr character(char value) {
char_ptr character(char value) {
set<CharacterRange> ranges = { value };
return make_shared<CharacterSet>(ranges);
}
rule_ptr character(const set<CharacterRange> &ranges) {
char_ptr character(const set<CharacterRange> &ranges) {
return make_shared<CharacterSet>(ranges);
}
rule_ptr character(const set<CharacterRange> &ranges, bool sign) {
char_ptr character(const set<CharacterRange> &ranges, bool sign) {
return make_shared<CharacterSet>(ranges, sign);
}
@ -52,7 +52,7 @@ namespace tree_sitter {
return make_shared<String>(value);
}
rule_ptr sym(const string &name) {
sym_ptr sym(const string &name) {
return make_shared<Symbol>(name, false);
}

View file

@ -16,16 +16,16 @@
namespace tree_sitter {
namespace rules {
rule_ptr blank();
rule_ptr character(char value);
rule_ptr character(const std::set<CharacterRange> &matches);
rule_ptr character(const std::set<CharacterRange> &matches, bool);
char_ptr character(char value);
char_ptr character(const std::set<CharacterRange> &matches);
char_ptr character(const std::set<CharacterRange> &matches, bool);
rule_ptr choice(const std::vector<rule_ptr> &rules);
rule_ptr pattern(const std::string &value);
rule_ptr repeat(const rule_ptr content);
rule_ptr seq(const std::vector<rule_ptr> &rules);
rule_ptr str(const std::string &value);
rule_ptr sym(const std::string &name);
sym_ptr sym(const std::string &name);
rule_ptr aux_sym(const std::string &name);
}
}

View file

@ -22,6 +22,8 @@ namespace tree_sitter {
std::string name;
bool is_auxiliary;
};
typedef std::shared_ptr<const Symbol> sym_ptr;
}
}