Make separate functions for character-level and syntax-level rule transitions
This commit is contained in:
parent
e92ac719f4
commit
905a408998
9 changed files with 109 additions and 92 deletions
|
|
@ -7,32 +7,24 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("rule transitions", []() {
|
||||
rule_ptr symbol1 = sym("1");
|
||||
rule_ptr symbol2 = sym("2");
|
||||
rule_ptr symbol3 = sym("3");
|
||||
rule_ptr symbol4 = sym("4");
|
||||
rule_ptr char1 = character('a');
|
||||
auto symbol1 = sym("1");
|
||||
auto symbol2 = sym("2");
|
||||
auto symbol3 = sym("3");
|
||||
auto symbol4 = sym("4");
|
||||
auto char1 = character({ 'a' });
|
||||
|
||||
it("handles symbols", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(symbol1),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
sym_transitions(symbol1),
|
||||
Equals(transition_map<Symbol, Rule>({
|
||||
{ symbol1, blank() }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles characters", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(char1),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
{ char1, blank() }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(choice({ symbol1, symbol2 })),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
sym_transitions(choice({ symbol1, symbol2 })),
|
||||
Equals(transition_map<Symbol, Rule>({
|
||||
{ symbol1, blank() },
|
||||
{ symbol2, blank() }
|
||||
})));
|
||||
|
|
@ -40,77 +32,84 @@ describe("rule transitions", []() {
|
|||
|
||||
it("handles sequences", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(seq({ symbol1, symbol2 })),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
sym_transitions(seq({ symbol1, symbol2 })),
|
||||
Equals(transition_map<Symbol, Rule>({
|
||||
{ symbol1, symbol2 }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles_long_sequences", [&]() {
|
||||
it("handles long sequences", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(seq({
|
||||
sym_transitions(seq({
|
||||
symbol1,
|
||||
symbol2,
|
||||
symbol3,
|
||||
symbol4
|
||||
})),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
Equals(transition_map<Symbol, Rule>({
|
||||
{ symbol1, seq({ symbol2, symbol3, symbol4 }) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences whose left sides can be blank", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(seq({
|
||||
sym_transitions(seq({
|
||||
choice({
|
||||
sym("x"),
|
||||
symbol1,
|
||||
blank(),
|
||||
}),
|
||||
seq({
|
||||
sym("x"),
|
||||
sym("y")
|
||||
symbol1,
|
||||
symbol2
|
||||
})
|
||||
})), Equals(transition_map<Rule, Rule>({
|
||||
{ sym("x"), choice({ seq({ sym("x"), sym("y") }), sym("y"), }) }
|
||||
})), Equals(transition_map<Symbol, Rule>({
|
||||
{ symbol1, choice({ seq({ symbol1, symbol2 }), symbol2, }) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices with common starting symbols", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(
|
||||
sym_transitions(
|
||||
choice({
|
||||
seq({ symbol1, symbol2 }),
|
||||
seq({ symbol1, symbol3 }) })),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
Equals(transition_map<Symbol, Rule>({
|
||||
{ symbol1, choice({ symbol2, symbol3 }) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles characters", [&]() {
|
||||
AssertThat(
|
||||
char_transitions(char1),
|
||||
Equals(transition_map<CharacterSet, Rule>({
|
||||
{ char1, blank() }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles strings", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(str("bad")),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
{ character('b'), seq({ character('a'), character('d') })
|
||||
}
|
||||
})));
|
||||
char_transitions(str("bad")),
|
||||
Equals(transition_map<CharacterSet, Rule>({
|
||||
{ character({ 'b' }, true), seq({ character('a'), character('d') }) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles patterns", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(pattern("a|b")),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
{ character('a'), blank() },
|
||||
{ character('b'), blank() }
|
||||
char_transitions(pattern("a|b")),
|
||||
Equals(transition_map<CharacterSet, Rule>({
|
||||
{ character({ 'a' }, true), blank() },
|
||||
{ character({ 'b' }, true), blank() }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles repeats", [&]() {
|
||||
rule_ptr rule = repeat(str("ab"));
|
||||
AssertThat(
|
||||
rule_transitions(rule),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
char_transitions(rule),
|
||||
Equals(transition_map<CharacterSet, Rule>({
|
||||
{
|
||||
character('a'),
|
||||
character({ 'a' }, true),
|
||||
seq({
|
||||
character('b'),
|
||||
choice({
|
||||
|
|
@ -122,10 +121,10 @@ describe("rule transitions", []() {
|
|||
|
||||
rule = repeat(str("a"));
|
||||
AssertThat(
|
||||
rule_transitions(rule),
|
||||
Equals(transition_map<Rule, Rule>({
|
||||
char_transitions(rule),
|
||||
Equals(transition_map<CharacterSet, Rule>({
|
||||
{
|
||||
character('a'),
|
||||
character({ 'a' }, true),
|
||||
choice({
|
||||
rule,
|
||||
blank()
|
||||
|
|
@ -143,14 +142,14 @@ describe("rule transitions", []() {
|
|||
character('"'),
|
||||
});
|
||||
|
||||
AssertThat(rule_transitions(rule), Equals(transition_map<Rule, Rule>({
|
||||
AssertThat(char_transitions(rule), Equals(transition_map<CharacterSet, Rule>({
|
||||
{ character({ '"' }, false), seq({
|
||||
choice({
|
||||
repeat(character({ '"' }, false)),
|
||||
blank(),
|
||||
}),
|
||||
character('"'), }) },
|
||||
{ character('"'), blank() },
|
||||
{ character({ '"' }, true), blank() },
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -15,14 +15,14 @@ namespace tree_sitter {
|
|||
unordered_map<Symbol, set<Symbol>> follow_sets(const ParseItem &item, const Grammar &grammar) {
|
||||
unordered_map<Symbol, set<Symbol>> result;
|
||||
|
||||
for (auto pair : rule_transitions(item.rule)) {
|
||||
auto symbol = dynamic_pointer_cast<const rules::Symbol>(pair.first);
|
||||
if (symbol && grammar.has_definition(*symbol)) {
|
||||
for (auto pair : sym_transitions(item.rule)) {
|
||||
auto symbol = *pair.first;
|
||||
if (grammar.has_definition(symbol)) {
|
||||
auto following_non_terminals = first_set(pair.second, grammar);
|
||||
if (rule_can_be_blank(pair.second)) {
|
||||
following_non_terminals.insert(item.lookahead_sym);
|
||||
}
|
||||
result.insert({ *symbol, following_non_terminals });
|
||||
result.insert({ symbol, following_non_terminals });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,13 +21,11 @@ namespace tree_sitter {
|
|||
transition_map<CharacterSet, LexItemSet> result;
|
||||
for (LexItem item : item_set) {
|
||||
transition_map<CharacterSet, LexItemSet> item_transitions;
|
||||
for (auto transition : rule_transitions(item.rule)) {
|
||||
auto rule = dynamic_pointer_cast<const CharacterSet>(transition.first);
|
||||
if (rule.get()) {
|
||||
auto new_item = LexItem(item.lhs, transition.second);
|
||||
auto new_item_set = LexItemSet({ new_item });
|
||||
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
|
||||
}
|
||||
for (auto transition : char_transitions(item.rule)) {
|
||||
auto rule = transition.first;
|
||||
auto new_item = LexItem(item.lhs, transition.second);
|
||||
auto new_item_set = LexItemSet({ new_item });
|
||||
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
|
||||
}
|
||||
|
||||
result.merge(item_transitions, [](shared_ptr<const LexItemSet> left, shared_ptr<const LexItemSet> right) -> shared_ptr<const LexItemSet> {
|
||||
|
|
@ -42,15 +40,13 @@ namespace tree_sitter {
|
|||
transition_map<rules::Symbol, ParseItemSet> result;
|
||||
for (ParseItem item : item_set) {
|
||||
transition_map<rules::Symbol, ParseItemSet> item_transitions;
|
||||
for (auto transition : rule_transitions(item.rule)) {
|
||||
auto rule = dynamic_pointer_cast<const rules::Symbol>(transition.first);
|
||||
if (rule.get()) {
|
||||
auto consumed_symbols = item.consumed_symbols;
|
||||
consumed_symbols.push_back(rule->is_auxiliary);
|
||||
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
|
||||
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
|
||||
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
|
||||
}
|
||||
for (auto transition : sym_transitions(item.rule)) {
|
||||
auto rule = transition.first;
|
||||
auto consumed_symbols = item.consumed_symbols;
|
||||
consumed_symbols.push_back(rule->is_auxiliary);
|
||||
auto new_item = ParseItem(item.lhs, transition.second, consumed_symbols, item.lookahead_sym);
|
||||
auto new_item_set = item_set_closure(ParseItemSet({ new_item }), grammar);
|
||||
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
|
||||
}
|
||||
|
||||
result.merge(item_transitions, [](shared_ptr<const ParseItemSet> left, shared_ptr<const ParseItemSet> right) -> shared_ptr<const ParseItemSet> {
|
||||
|
|
|
|||
|
|
@ -9,41 +9,55 @@ namespace tree_sitter {
|
|||
return typeid(*rule) == typeid(Blank);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
class TransitionsVisitor : public rules::Visitor {
|
||||
public:
|
||||
transition_map<Rule, Rule> value;
|
||||
transition_map<T, Rule> value;
|
||||
|
||||
static transition_map<T, Rule> transitions(const rule_ptr rule) {
|
||||
TransitionsVisitor<T> visitor;
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
}
|
||||
|
||||
void visit_atom(const Rule *rule) {
|
||||
auto atom = dynamic_cast<const T *>(rule);
|
||||
if (atom) {
|
||||
value = transition_map<T, Rule>({{ std::make_shared<const T>(*atom), blank() }});
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const CharacterSet *rule) {
|
||||
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
|
||||
visit_atom(rule);
|
||||
}
|
||||
|
||||
void visit(const Symbol *rule) {
|
||||
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
|
||||
visit_atom(rule);
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = rule_transitions(rule->left);
|
||||
value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
value = transitions(rule->left);
|
||||
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = rule_transitions(rule->left).map<Rule>([&](const rule_ptr left_rule) -> rule_ptr {
|
||||
value = transitions(rule->left).template map<Rule>([&](const rule_ptr left_rule) -> rule_ptr {
|
||||
if (is_blank(left_rule))
|
||||
return rule->right;
|
||||
else
|
||||
return seq({ left_rule, rule->right });
|
||||
});
|
||||
if (rule_can_be_blank(rule->left)) {
|
||||
value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
value = rule_transitions(rule->content).map<Rule>([&](const rule_ptr &value) -> rule_ptr {
|
||||
value = transitions(rule->content).template map<Rule>([&](const rule_ptr &value) -> rule_ptr {
|
||||
return seq({ value, choice({ rule->copy(), blank() }) });
|
||||
});
|
||||
}
|
||||
|
|
@ -52,20 +66,22 @@ namespace tree_sitter {
|
|||
rule_ptr result = character(rule->value[0]);
|
||||
for (int i = 1; i < rule->value.length(); i++)
|
||||
result = seq({ result, character(rule->value[i]) });
|
||||
value = rule_transitions(result);
|
||||
value = transitions(result);
|
||||
}
|
||||
|
||||
void visit(const Pattern *rule) {
|
||||
value = rule_transitions(rule->to_rule_tree());
|
||||
value = transitions(rule->to_rule_tree());
|
||||
}
|
||||
};
|
||||
|
||||
transition_map<Rule, Rule> rule_transitions(const rule_ptr &rule) {
|
||||
TransitionsVisitor visitor;
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
transition_map<CharacterSet, Rule> char_transitions(const rule_ptr &rule) {
|
||||
return TransitionsVisitor<CharacterSet>::transitions(rule);
|
||||
}
|
||||
|
||||
|
||||
transition_map<Symbol, Rule> sym_transitions(const rule_ptr &rule) {
|
||||
return TransitionsVisitor<Symbol>::transitions(rule);
|
||||
}
|
||||
|
||||
class EpsilonVisitor : public rules::Visitor {
|
||||
public:
|
||||
bool value;
|
||||
|
|
|
|||
|
|
@ -1,13 +1,15 @@
|
|||
#ifndef __tree_sitter__transitions__
|
||||
#define __tree_sitter__transitions__
|
||||
|
||||
#include "rule.h"
|
||||
#include "character_set.h"
|
||||
#include "symbol.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule);
|
||||
transition_map<rules::Rule, rules::Rule> rule_transitions(const rules::rule_ptr &rule);
|
||||
transition_map<rules::CharacterSet, rules::Rule> char_transitions(const rules::rule_ptr &rule);
|
||||
transition_map<rules::Symbol, rules::Rule> sym_transitions(const rules::rule_ptr &rule);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,8 @@ namespace tree_sitter {
|
|||
|
||||
std::set<CharacterRange> ranges;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const CharacterSet> char_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,16 +11,16 @@ namespace tree_sitter {
|
|||
return make_shared<Blank>();
|
||||
}
|
||||
|
||||
rule_ptr character(char value) {
|
||||
char_ptr character(char value) {
|
||||
set<CharacterRange> ranges = { value };
|
||||
return make_shared<CharacterSet>(ranges);
|
||||
}
|
||||
|
||||
rule_ptr character(const set<CharacterRange> &ranges) {
|
||||
char_ptr character(const set<CharacterRange> &ranges) {
|
||||
return make_shared<CharacterSet>(ranges);
|
||||
}
|
||||
|
||||
rule_ptr character(const set<CharacterRange> &ranges, bool sign) {
|
||||
char_ptr character(const set<CharacterRange> &ranges, bool sign) {
|
||||
return make_shared<CharacterSet>(ranges, sign);
|
||||
}
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ namespace tree_sitter {
|
|||
return make_shared<String>(value);
|
||||
}
|
||||
|
||||
rule_ptr sym(const string &name) {
|
||||
sym_ptr sym(const string &name) {
|
||||
return make_shared<Symbol>(name, false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,16 +16,16 @@
|
|||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
rule_ptr blank();
|
||||
rule_ptr character(char value);
|
||||
rule_ptr character(const std::set<CharacterRange> &matches);
|
||||
rule_ptr character(const std::set<CharacterRange> &matches, bool);
|
||||
char_ptr character(char value);
|
||||
char_ptr character(const std::set<CharacterRange> &matches);
|
||||
char_ptr character(const std::set<CharacterRange> &matches, bool);
|
||||
|
||||
rule_ptr choice(const std::vector<rule_ptr> &rules);
|
||||
rule_ptr pattern(const std::string &value);
|
||||
rule_ptr repeat(const rule_ptr content);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &rules);
|
||||
rule_ptr str(const std::string &value);
|
||||
rule_ptr sym(const std::string &name);
|
||||
sym_ptr sym(const std::string &name);
|
||||
rule_ptr aux_sym(const std::string &name);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ namespace tree_sitter {
|
|||
std::string name;
|
||||
bool is_auxiliary;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Symbol> sym_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue