Separate auxiliary rules from user-specified rules

This commit is contained in:
Max Brunsfeld 2014-01-28 13:27:30 -08:00
parent 19e5b2a563
commit fd0d77ef8b
16 changed files with 741 additions and 412 deletions

View file

@ -84,7 +84,10 @@ namespace tree_sitter {
{}
string symbol_id(rules::Symbol symbol) {
return "ts_symbol_" + symbol.name;
if (symbol.is_auxiliary)
return "ts_aux_" + symbol.name;
else
return "ts_symbol_" + symbol.name;
}
string character_code(char character) {

View file

@ -16,11 +16,18 @@ namespace tree_sitter {
rules(rules),
start_rule_name(start_rule_name) {}
Grammar::Grammar(std::string start_rule_name, rule_map &rules, rule_map &aux_rules) :
rules(rules),
aux_rules(aux_rules),
start_rule_name(start_rule_name) {}
const rules::rule_ptr Grammar::rule(const rules::Symbol &symbol) const {
auto iter = rules.find(symbol.name);
return (iter == rules.end()) ?
rules::rule_ptr(nullptr) :
iter->second;
auto map = symbol.is_auxiliary ? aux_rules : rules;
auto iter = map.find(symbol.name);
if (iter != map.end())
return iter->second;
else
return rules::rule_ptr();
}
vector<string> Grammar::rule_names() const {
@ -34,21 +41,30 @@ namespace tree_sitter {
bool Grammar::operator==(const Grammar &other) const {
if (other.start_rule_name != start_rule_name) return false;
if (other.rules.size() != rules.size()) return false;
if (other.aux_rules.size() != aux_rules.size()) return false;
for (auto pair : rules) {
auto other_pair = other.rules.find(pair.first);
if (other_pair == other.rules.end()) return false;
auto orr = other_pair->second->to_string();;
if (!other_pair->second->operator==(*pair.second)) return false;
}
for (auto pair : aux_rules) {
auto other_pair = other.aux_rules.find(pair.first);
if (other_pair == other.aux_rules.end()) return false;
if (!other_pair->second->operator==(*pair.second)) return false;
}
return true;
}
bool Grammar::has_definition(const rules::Symbol &symbol) const {
return rules.find(symbol.name) != rules.end();
return rule(symbol).get() != nullptr;
}
ostream& operator<<(ostream &stream, const Grammar &grammar) {
stream << string("#<grammar: ");
stream << string("#<grammar");
stream << string(" rules: {");
bool started = false;
for (auto pair : grammar.rules) {
if (started) stream << string(", ");
@ -57,6 +73,19 @@ namespace tree_sitter {
stream << pair.second;
started = true;
}
stream << string("}");
stream << string(" aux_rules: {");
started = false;
for (auto pair : grammar.aux_rules) {
if (started) stream << string(", ");
stream << pair.first;
stream << string(" => ");
stream << pair.second;
started = true;
}
stream << string("}");
return stream << string(">");
}
}

View file

@ -8,16 +8,21 @@
namespace tree_sitter {
class Grammar {
typedef std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> rule_map_init_list;
typedef const std::unordered_map<std::string, const rules::rule_ptr> rule_map;
public:
Grammar(const rule_map_init_list &rules);
Grammar(std::string start_rule_name, const std::unordered_map<std::string, const rules::rule_ptr> &rules);
const rules::rule_ptr rule(const rules::Symbol &) const;
Grammar(std::string start_rule_name, rule_map &rules);
Grammar(std::string start_rule_name, rule_map &rules, rule_map &aux_rules);
const std::string start_rule_name;
std::vector<std::string> rule_names() const;
bool operator==(const Grammar &other) const;
bool has_definition(const rules::Symbol &symbol) const;
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
const std::unordered_map<std::string, const rules::rule_ptr> rules;
rule_map rules;
rule_map aux_rules;
};
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);

View file

@ -13,7 +13,7 @@ namespace tree_sitter {
rule_ptr value;
unordered_map<string, const rule_ptr> aux_rules;
rule_ptr apply(const rule_ptr rule) {
rule_ptr apply(const rule_ptr &rule) {
rule->accept(*this);
return value;
}
@ -21,7 +21,7 @@ namespace tree_sitter {
rule_ptr make_repeat_helper(string name, const rule_ptr &rule) {
return seq({
rule,
choice({ sym(name), blank() })
choice({ aux_sym(name), blank() })
});
}
@ -29,7 +29,7 @@ namespace tree_sitter {
rule_ptr inner_rule = apply(rule->content);
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
value = sym(helper_rule_name);
value = aux_sym(helper_rule_name);
}
void visit(const Seq *rule) {
@ -48,11 +48,11 @@ namespace tree_sitter {
Grammar expand_repeats(const Grammar &grammar) {
unordered_map<string, const rule_ptr> result;
RepeatExpander visitor;
for (auto pair : grammar.rules)
result.insert({ pair.first, visitor.apply(pair.second) });
for (auto pair : visitor.aux_rules)
result.insert(pair);
return Grammar(grammar.start_rule_name, result);
return Grammar(grammar.start_rule_name, result, visitor.aux_rules);
}
}
}

View file

@ -29,7 +29,7 @@ namespace tree_sitter {
return value;
} else {
string token_name = add_token(rule);
return sym(token_name);
return aux_sym(token_name);
}
}
@ -37,7 +37,7 @@ namespace tree_sitter {
for (auto pair : tokens)
if (*pair.second == *rule)
return pair.first;
string name = to_string(tokens.size() + 1);
string name = "token" + to_string(tokens.size() + 1);
tokens.insert({ name, rule });
return name;
}
@ -62,6 +62,7 @@ namespace tree_sitter {
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
TokenExtractor extractor;
unordered_map<string, const rule_ptr> rules;
unordered_map<string, const rule_ptr> aux_rules;
unordered_map<string, const rule_ptr> tokens;
for (auto pair : input_grammar.rules) {
@ -73,13 +74,20 @@ namespace tree_sitter {
else
tokens.insert({ name, rule });
}
for (auto pair : extractor.tokens)
tokens.insert(pair);
for (auto pair : input_grammar.aux_rules) {
string name = pair.first;
rule_ptr rule = pair.second;
auto new_rule = extractor.initial_apply(rule);
if (new_rule.get())
aux_rules.insert({ name, new_rule });
else
tokens.insert({ name, rule });
}
return {
Grammar(input_grammar.start_rule_name, rules),
Grammar("", tokens)
Grammar("", tokens, extractor.tokens)
};
}
}

View file

@ -7,8 +7,10 @@ using std::pair;
namespace tree_sitter {
namespace prepare_grammar {
pair<Grammar, Grammar> perform(const Grammar &input_grammar) {
auto rule_grammar = expand_repeats(input_grammar);
return prepare_grammar::extract_tokens(rule_grammar);
auto grammars = prepare_grammar::extract_tokens(input_grammar);
auto rule_grammar = expand_repeats(grammars.first);
auto lex_grammar = grammars.second;
return { rule_grammar, lex_grammar };
}
}
}

View file

@ -47,7 +47,11 @@ namespace tree_sitter {
}
rule_ptr sym(const string &name) {
return make_shared<Symbol>(name);
return make_shared<Symbol>(name, false);
}
rule_ptr aux_sym(const string &name) {
return make_shared<Symbol>(name, true);
}
}
}

View file

@ -24,6 +24,7 @@ namespace tree_sitter {
rule_ptr seq(const std::initializer_list<rule_ptr> &rules);
rule_ptr str(const std::string &value);
rule_ptr sym(const std::string &name);
rule_ptr aux_sym(const std::string &name);
}
}

View file

@ -5,15 +5,16 @@ using std::hash;
namespace tree_sitter {
namespace rules {
Symbol::Symbol(const std::string &name) : name(name) {};
Symbol::Symbol(const std::string &name) : name(name), is_auxiliary(false) {};
Symbol::Symbol(const std::string &name, bool is_auxiliary) : name(name), is_auxiliary(is_auxiliary) {};
bool Symbol::operator==(const Rule &rule) const {
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
return other && (other->name == name);
return other && (other->name == name) && (other->is_auxiliary == is_auxiliary);
}
size_t Symbol::hash_code() const {
return typeid(this).hash_code() ^ hash<string>()(name);
return typeid(this).hash_code() ^ hash<string>()(name) ^ hash<bool>()(is_auxiliary);
}
rule_ptr Symbol::copy() const {
@ -21,11 +22,15 @@ namespace tree_sitter {
}
string Symbol::to_string() const {
return string("#<sym '") + name + "'>";
return is_auxiliary ?
string("#<aux_sym '") + name + "'>" :
string("#<sym '") + name + "'>";
}
bool Symbol::operator<(const Symbol &other) const {
return name < other.name;
if (name < other.name) return true;
if (other.name < name) return false;
return is_auxiliary < other.is_auxiliary;
}
void Symbol::accept(Visitor &visitor) const {

View file

@ -8,6 +8,7 @@ namespace tree_sitter {
class Symbol : public Rule {
public:
Symbol(const std::string &name);
Symbol(const std::string &name, bool is_auxiliary);
bool operator==(const Rule& other) const;
size_t hash_code() const;
@ -17,6 +18,7 @@ namespace tree_sitter {
bool operator<(const Symbol &other) const;
std::string name;
bool is_auxiliary;
};
}
}