Refactor rule visitors
This commit is contained in:
parent
713b3899c5
commit
2c30dce08e
6 changed files with 100 additions and 138 deletions
|
|
@ -11,11 +11,10 @@ namespace tree_sitter {
|
|||
using namespace rules;
|
||||
|
||||
namespace build_tables {
|
||||
class FirstSetVisitor : Visitor {
|
||||
set<Symbol> value;
|
||||
class FirstSet : public RuleFn<set<Symbol>> {
|
||||
const PreparedGrammar grammar;
|
||||
|
||||
FirstSetVisitor(const PreparedGrammar &grammar) : grammar(grammar) {}
|
||||
public:
|
||||
FirstSet(const PreparedGrammar &grammar) : grammar(grammar) {}
|
||||
|
||||
set<Symbol> set_union(const set<Symbol> &left, const set<Symbol> &right) {
|
||||
set<Symbol> result = left;
|
||||
|
|
@ -25,33 +24,27 @@ namespace tree_sitter {
|
|||
|
||||
void visit(const Symbol *rule) {
|
||||
if (grammar.has_definition(*rule)) {
|
||||
value = apply(grammar.rule(*rule), grammar);
|
||||
value = apply(grammar.rule(*rule));
|
||||
} else {
|
||||
value = set<Symbol>({ *rule });
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = set_union(apply(rule->left, grammar), apply(rule->right, grammar));
|
||||
value = set_union(apply(rule->left), apply(rule->right));
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = apply(rule->left, grammar);
|
||||
auto result = apply(rule->left);
|
||||
if (rule_can_be_blank(rule->left, grammar)) {
|
||||
value = set_union(value, apply(rule->right, grammar));
|
||||
result = set_union(result, apply(rule->right));
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static set<Symbol> apply(const rule_ptr rule, const PreparedGrammar &grammar) {
|
||||
FirstSetVisitor visitor(grammar);
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
value = result;
|
||||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_set(const rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
return FirstSetVisitor::apply(rule, grammar);
|
||||
return FirstSet(grammar).apply(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -11,41 +11,47 @@ namespace tree_sitter {
|
|||
using namespace rules;
|
||||
|
||||
namespace build_tables {
|
||||
class EpsilonVisitor : public rules::Visitor {
|
||||
public:
|
||||
bool value;
|
||||
|
||||
class CanBeBlank : public RuleFn<bool> {
|
||||
protected:
|
||||
void default_visit(const Rule *) {
|
||||
value = false;
|
||||
}
|
||||
|
||||
void visit(const Blank *) {
|
||||
virtual void visit(const Blank *) {
|
||||
value = true;
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = rule_can_be_blank(rule->left) || rule_can_be_blank(rule->right);
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = rule_can_be_blank(rule->left) && rule_can_be_blank(rule->right);
|
||||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
virtual void visit(const Repeat *rule) {
|
||||
value = true;
|
||||
}
|
||||
|
||||
virtual void visit(const Choice *rule) {
|
||||
value = apply(rule->left) || apply(rule->right);
|
||||
}
|
||||
|
||||
virtual void visit(const Seq *rule) {
|
||||
value = apply(rule->left) && apply(rule->right);
|
||||
}
|
||||
};
|
||||
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
const PreparedGrammar grammar;
|
||||
using CanBeBlank::visit;
|
||||
|
||||
public:
|
||||
CanBeBlankRecursive(const PreparedGrammar &grammar) : grammar(grammar) {}
|
||||
|
||||
void visit(const Symbol *rule) {
|
||||
value = grammar.has_definition(*rule) && apply(grammar.rule(*rule));
|
||||
}
|
||||
};
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule) {
|
||||
EpsilonVisitor visitor;
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
return CanBeBlank().apply(rule);
|
||||
}
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
if (rule_can_be_blank(rule)) return true;
|
||||
auto symbol = std::dynamic_pointer_cast<const Symbol>(rule);
|
||||
return (symbol.get() && grammar.has_definition(*symbol) && rule_can_be_blank(grammar.rule(*symbol), grammar));
|
||||
return CanBeBlankRecursive(grammar).apply(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@
|
|||
#include "rules/string.h"
|
||||
#include "rules/repeat.h"
|
||||
#include "rules/pattern.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/character_set.h"
|
||||
#include "rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::map;
|
||||
|
|
@ -17,16 +17,12 @@ namespace tree_sitter {
|
|||
using namespace rules;
|
||||
|
||||
namespace build_tables {
|
||||
bool is_blank(const rule_ptr &rule) {
|
||||
return typeid(*rule) == typeid(Blank);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
map<T, rule_ptr> merge_transitions(const map<T, rule_ptr> &left, const map<T, rule_ptr> &right);
|
||||
|
||||
template<>
|
||||
map<CharacterSet, rule_ptr> merge_transitions(const map<CharacterSet, rule_ptr> &left, const map<CharacterSet, rule_ptr> &right) {
|
||||
auto transitions = merge_char_transitions<rule_ptr>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
auto transitions = merge_char_transitions<rule_ptr>(left, right, [](rule_ptr left, rule_ptr right) {
|
||||
return make_shared<Choice>(left, right);
|
||||
});
|
||||
return *static_cast<map<CharacterSet, rule_ptr> *>(&transitions);
|
||||
|
|
@ -34,7 +30,7 @@ namespace tree_sitter {
|
|||
|
||||
template<>
|
||||
map<Symbol, rule_ptr> merge_transitions(const map<Symbol, rule_ptr> &left, const map<Symbol, rule_ptr> &right) {
|
||||
auto transitions = merge_sym_transitions<rule_ptr>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
auto transitions = merge_sym_transitions<rule_ptr>(left, right, [](rule_ptr left, rule_ptr right) {
|
||||
return make_shared<Choice>(left, right);
|
||||
});
|
||||
return *static_cast<map<Symbol, rule_ptr> *>(&transitions);
|
||||
|
|
@ -49,22 +45,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
template<typename T>
|
||||
class TransitionsVisitor : public rules::Visitor {
|
||||
public:
|
||||
map<T, rule_ptr> value;
|
||||
|
||||
static map<T, rule_ptr> transitions(const rule_ptr rule) {
|
||||
TransitionsVisitor<T> visitor;
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
}
|
||||
|
||||
class RuleTransitions : public RuleFn<map<T, rule_ptr>> {
|
||||
void visit_atom(const Rule *rule) {
|
||||
auto atom = dynamic_cast<const T *>(rule);
|
||||
if (atom) {
|
||||
value = map<T, rule_ptr>();
|
||||
value.insert({ *atom, make_shared<Blank>() });
|
||||
}
|
||||
if (atom)
|
||||
this->value = map<T, rule_ptr>({{ *atom, make_shared<Blank>() }});
|
||||
}
|
||||
|
||||
void visit(const CharacterSet *rule) {
|
||||
|
|
@ -76,24 +61,22 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = transitions(rule->left);
|
||||
value = merge_transitions<T>(transitions(rule->left), transitions(rule->right));
|
||||
this->value = merge_transitions<T>(this->apply(rule->left),
|
||||
this->apply(rule->right));
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = map_transitions(transitions(rule->left), [&](const rule_ptr left_rule) {
|
||||
auto result = map_transitions(this->apply(rule->left), [&](const rule_ptr left_rule) {
|
||||
return Seq::Build({ left_rule, rule->right });
|
||||
});
|
||||
if (rule_can_be_blank(rule->left)) {
|
||||
value = merge_transitions<T>(value, transitions(rule->right));
|
||||
}
|
||||
if (rule_can_be_blank(rule->left))
|
||||
result = merge_transitions<T>(result, this->apply(rule->right));
|
||||
this->value = result;
|
||||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
value = map_transitions(transitions(rule->content), [&](const rule_ptr &value) {
|
||||
return Seq::Build({
|
||||
value,
|
||||
make_shared<Choice>(rule->copy(), make_shared<Blank>()) });
|
||||
this->value = map_transitions(this->apply(rule->content), [&](const rule_ptr &value) {
|
||||
return Seq::Build({ value, make_shared<Choice>(rule->copy(), make_shared<Blank>()) });
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -101,20 +84,20 @@ namespace tree_sitter {
|
|||
rule_ptr result = make_shared<Blank>();
|
||||
for (char val : rule->value)
|
||||
result = Seq::Build({ result, make_shared<CharacterSet>(set<CharacterRange>({ val })) });
|
||||
value = transitions(result);
|
||||
this->value = this->apply(result);
|
||||
}
|
||||
|
||||
void visit(const Pattern *rule) {
|
||||
value = transitions(rule->to_rule_tree());
|
||||
this->value = this->apply(rule->to_rule_tree());
|
||||
}
|
||||
};
|
||||
|
||||
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {
|
||||
return TransitionsVisitor<CharacterSet>::transitions(rule);
|
||||
return RuleTransitions<CharacterSet>().apply(rule);
|
||||
}
|
||||
|
||||
map<Symbol, rule_ptr> sym_transitions(const rule_ptr &rule) {
|
||||
return TransitionsVisitor<Symbol>::transitions(rule);
|
||||
return RuleTransitions<Symbol>().apply(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,16 +16,7 @@ namespace tree_sitter {
|
|||
using namespace rules;
|
||||
|
||||
namespace prepare_grammar {
|
||||
class RepeatExpander : rules::Visitor {
|
||||
public:
|
||||
rule_ptr value;
|
||||
map<const string, const rule_ptr> aux_rules;
|
||||
|
||||
rule_ptr apply(const rule_ptr &rule) {
|
||||
rule->accept(*this);
|
||||
return value;
|
||||
}
|
||||
|
||||
class ExpandRepeats : public RuleFn<rule_ptr> {
|
||||
rule_ptr make_repeat_helper(string name, const rule_ptr &rule) {
|
||||
return Choice::Build({
|
||||
Seq::Build({ rule, make_shared<Symbol>(name, SymbolTypeAuxiliary) }),
|
||||
|
|
@ -50,17 +41,19 @@ namespace tree_sitter {
|
|||
void default_visit(const Rule *rule) {
|
||||
value = rule->copy();
|
||||
}
|
||||
|
||||
public:
|
||||
map<const string, const rule_ptr> aux_rules;
|
||||
};
|
||||
|
||||
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
|
||||
map<const string, const rule_ptr> rules;
|
||||
map<const string, const rule_ptr> aux_rules(grammar.aux_rules);
|
||||
RepeatExpander visitor;
|
||||
map<const string, const rule_ptr> rules, aux_rules(grammar.aux_rules);
|
||||
ExpandRepeats expander;
|
||||
|
||||
for (auto pair : grammar.rules)
|
||||
rules.insert({ pair.first, visitor.apply(pair.second) });
|
||||
for (auto &pair : grammar.rules)
|
||||
rules.insert({ pair.first, expander.apply(pair.second) });
|
||||
|
||||
aux_rules.insert(visitor.aux_rules.begin(), visitor.aux_rules.end());
|
||||
aux_rules.insert(expander.aux_rules.begin(), expander.aux_rules.end());
|
||||
|
||||
return PreparedGrammar(grammar.start_rule_name, rules, aux_rules);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@
|
|||
#include "rules/repeat.h"
|
||||
#include "rules/blank.h"
|
||||
#include "rules/symbol.h"
|
||||
#include "rules/string.h"
|
||||
#include "rules/pattern.h"
|
||||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -18,10 +20,7 @@ namespace tree_sitter {
|
|||
using namespace rules;
|
||||
|
||||
namespace prepare_grammar {
|
||||
class TokenChecker : public Visitor {
|
||||
public:
|
||||
bool value;
|
||||
|
||||
class IsToken : public RuleFn<bool> {
|
||||
void default_visit(const Rule *rule) {
|
||||
value = false;
|
||||
}
|
||||
|
|
@ -34,36 +33,8 @@ namespace tree_sitter {
|
|||
value = true;
|
||||
}
|
||||
};
|
||||
|
||||
bool is_token(const rule_ptr &rule) {
|
||||
TokenChecker checker;
|
||||
rule->accept(checker);
|
||||
return checker.value;
|
||||
}
|
||||
|
||||
class TokenExtractor : Visitor {
|
||||
public:
|
||||
rule_ptr value;
|
||||
map<const string, const rule_ptr> tokens;
|
||||
|
||||
rule_ptr initial_apply(const rule_ptr rule) {
|
||||
if (is_token(rule)) {
|
||||
return rule_ptr();
|
||||
} else {
|
||||
return apply(rule);
|
||||
}
|
||||
}
|
||||
|
||||
rule_ptr apply(const rule_ptr rule) {
|
||||
if (!is_token(rule) || rule->operator==(Blank())) {
|
||||
rule->accept(*this);
|
||||
return value;
|
||||
} else {
|
||||
string token_name = add_token(rule);
|
||||
return make_shared<Symbol>(token_name, SymbolTypeAuxiliary);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TokenExtractor : public RuleFn<rule_ptr> {
|
||||
string add_token(const rule_ptr &rule) {
|
||||
for (auto pair : tokens)
|
||||
if (*pair.second == *rule)
|
||||
|
|
@ -72,9 +43,14 @@ namespace tree_sitter {
|
|||
tokens.insert({ name, rule });
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
void default_visit(const Rule *rule) {
|
||||
value = rule->copy();
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(result)) {
|
||||
value = make_shared<Symbol>(add_token(result), SymbolTypeAuxiliary);
|
||||
} else {
|
||||
value = result;
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
|
|
@ -88,33 +64,31 @@ namespace tree_sitter {
|
|||
void visit(const Repeat *rule) {
|
||||
value = make_shared<Repeat>(apply(rule->content));
|
||||
}
|
||||
|
||||
public:
|
||||
map<const string, const rule_ptr> tokens;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
|
||||
map<const string, const rule_ptr> rules, tokens, aux_rules, aux_tokens;
|
||||
TokenExtractor extractor;
|
||||
map<const string, const rule_ptr> rules;
|
||||
map<const string, const rule_ptr> tokens;
|
||||
map<const string, const rule_ptr> aux_rules;
|
||||
map<const string, const rule_ptr> aux_tokens;
|
||||
|
||||
for (auto pair : input_grammar.rules) {
|
||||
for (auto &pair : input_grammar.rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
rule_ptr new_rule = extractor.initial_apply(rule);
|
||||
if (new_rule.get())
|
||||
rules.insert({ name, new_rule });
|
||||
else
|
||||
if (IsToken().apply(rule))
|
||||
tokens.insert({ name, rule });
|
||||
else
|
||||
rules.insert({ name, extractor.apply(rule) });
|
||||
}
|
||||
|
||||
for (auto pair : input_grammar.aux_rules) {
|
||||
for (auto &pair : input_grammar.aux_rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
rule_ptr new_rule = extractor.initial_apply(rule);
|
||||
if (new_rule.get())
|
||||
aux_rules.insert({ name, new_rule });
|
||||
else
|
||||
if (IsToken().apply(rule))
|
||||
aux_tokens.insert({ name, rule });
|
||||
else
|
||||
aux_rules.insert({ name, extractor.apply(rule) });
|
||||
}
|
||||
|
||||
aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
#ifndef __tree_sitter__rule_visitor__
|
||||
#define __tree_sitter__rule_visitor__
|
||||
|
||||
#include "./rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class Rule;
|
||||
class Blank;
|
||||
class Symbol;
|
||||
class CharacterSet;
|
||||
|
|
@ -25,6 +26,18 @@ namespace tree_sitter {
|
|||
virtual void visit(const String *rule);
|
||||
virtual void visit(const Pattern *rule);
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class RuleFn : public Visitor {
|
||||
protected:
|
||||
T value;
|
||||
public:
|
||||
T apply(const rule_ptr &rule) {
|
||||
value = T();
|
||||
rule->accept(*this);
|
||||
return value;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue