Separate auxiliary rules from user-specified rules
This commit is contained in:
parent
19e5b2a563
commit
fd0d77ef8b
16 changed files with 741 additions and 412 deletions
|
|
@ -84,7 +84,10 @@ namespace tree_sitter {
|
|||
{}
|
||||
|
||||
string symbol_id(rules::Symbol symbol) {
|
||||
return "ts_symbol_" + symbol.name;
|
||||
if (symbol.is_auxiliary)
|
||||
return "ts_aux_" + symbol.name;
|
||||
else
|
||||
return "ts_symbol_" + symbol.name;
|
||||
}
|
||||
|
||||
string character_code(char character) {
|
||||
|
|
|
|||
|
|
@ -16,11 +16,18 @@ namespace tree_sitter {
|
|||
rules(rules),
|
||||
start_rule_name(start_rule_name) {}
|
||||
|
||||
Grammar::Grammar(std::string start_rule_name, rule_map &rules, rule_map &aux_rules) :
|
||||
rules(rules),
|
||||
aux_rules(aux_rules),
|
||||
start_rule_name(start_rule_name) {}
|
||||
|
||||
const rules::rule_ptr Grammar::rule(const rules::Symbol &symbol) const {
|
||||
auto iter = rules.find(symbol.name);
|
||||
return (iter == rules.end()) ?
|
||||
rules::rule_ptr(nullptr) :
|
||||
iter->second;
|
||||
auto map = symbol.is_auxiliary ? aux_rules : rules;
|
||||
auto iter = map.find(symbol.name);
|
||||
if (iter != map.end())
|
||||
return iter->second;
|
||||
else
|
||||
return rules::rule_ptr();
|
||||
}
|
||||
|
||||
vector<string> Grammar::rule_names() const {
|
||||
|
|
@ -34,21 +41,30 @@ namespace tree_sitter {
|
|||
bool Grammar::operator==(const Grammar &other) const {
|
||||
if (other.start_rule_name != start_rule_name) return false;
|
||||
if (other.rules.size() != rules.size()) return false;
|
||||
if (other.aux_rules.size() != aux_rules.size()) return false;
|
||||
|
||||
for (auto pair : rules) {
|
||||
auto other_pair = other.rules.find(pair.first);
|
||||
if (other_pair == other.rules.end()) return false;
|
||||
auto orr = other_pair->second->to_string();;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
}
|
||||
for (auto pair : aux_rules) {
|
||||
auto other_pair = other.aux_rules.find(pair.first);
|
||||
if (other_pair == other.aux_rules.end()) return false;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Grammar::has_definition(const rules::Symbol &symbol) const {
|
||||
return rules.find(symbol.name) != rules.end();
|
||||
return rule(symbol).get() != nullptr;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const Grammar &grammar) {
|
||||
stream << string("#<grammar: ");
|
||||
stream << string("#<grammar");
|
||||
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules) {
|
||||
if (started) stream << string(", ");
|
||||
|
|
@ -57,6 +73,19 @@ namespace tree_sitter {
|
|||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string("}");
|
||||
|
||||
stream << string(" aux_rules: {");
|
||||
started = false;
|
||||
for (auto pair : grammar.aux_rules) {
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string("}");
|
||||
|
||||
return stream << string(">");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,16 +8,21 @@
|
|||
namespace tree_sitter {
|
||||
class Grammar {
|
||||
typedef std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> rule_map_init_list;
|
||||
typedef const std::unordered_map<std::string, const rules::rule_ptr> rule_map;
|
||||
|
||||
public:
|
||||
Grammar(const rule_map_init_list &rules);
|
||||
Grammar(std::string start_rule_name, const std::unordered_map<std::string, const rules::rule_ptr> &rules);
|
||||
const rules::rule_ptr rule(const rules::Symbol &) const;
|
||||
Grammar(std::string start_rule_name, rule_map &rules);
|
||||
Grammar(std::string start_rule_name, rule_map &rules, rule_map &aux_rules);
|
||||
|
||||
const std::string start_rule_name;
|
||||
std::vector<std::string> rule_names() const;
|
||||
bool operator==(const Grammar &other) const;
|
||||
bool has_definition(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::unordered_map<std::string, const rules::rule_ptr> rules;
|
||||
rule_map rules;
|
||||
rule_map aux_rules;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ namespace tree_sitter {
|
|||
rule_ptr value;
|
||||
unordered_map<string, const rule_ptr> aux_rules;
|
||||
|
||||
rule_ptr apply(const rule_ptr rule) {
|
||||
rule_ptr apply(const rule_ptr &rule) {
|
||||
rule->accept(*this);
|
||||
return value;
|
||||
}
|
||||
|
|
@ -21,7 +21,7 @@ namespace tree_sitter {
|
|||
rule_ptr make_repeat_helper(string name, const rule_ptr &rule) {
|
||||
return seq({
|
||||
rule,
|
||||
choice({ sym(name), blank() })
|
||||
choice({ aux_sym(name), blank() })
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -29,7 +29,7 @@ namespace tree_sitter {
|
|||
rule_ptr inner_rule = apply(rule->content);
|
||||
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
|
||||
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
|
||||
value = sym(helper_rule_name);
|
||||
value = aux_sym(helper_rule_name);
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
|
|
@ -48,11 +48,11 @@ namespace tree_sitter {
|
|||
Grammar expand_repeats(const Grammar &grammar) {
|
||||
unordered_map<string, const rule_ptr> result;
|
||||
RepeatExpander visitor;
|
||||
|
||||
for (auto pair : grammar.rules)
|
||||
result.insert({ pair.first, visitor.apply(pair.second) });
|
||||
for (auto pair : visitor.aux_rules)
|
||||
result.insert(pair);
|
||||
return Grammar(grammar.start_rule_name, result);
|
||||
|
||||
return Grammar(grammar.start_rule_name, result, visitor.aux_rules);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ namespace tree_sitter {
|
|||
return value;
|
||||
} else {
|
||||
string token_name = add_token(rule);
|
||||
return sym(token_name);
|
||||
return aux_sym(token_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ namespace tree_sitter {
|
|||
for (auto pair : tokens)
|
||||
if (*pair.second == *rule)
|
||||
return pair.first;
|
||||
string name = to_string(tokens.size() + 1);
|
||||
string name = "token" + to_string(tokens.size() + 1);
|
||||
tokens.insert({ name, rule });
|
||||
return name;
|
||||
}
|
||||
|
|
@ -62,6 +62,7 @@ namespace tree_sitter {
|
|||
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
|
||||
TokenExtractor extractor;
|
||||
unordered_map<string, const rule_ptr> rules;
|
||||
unordered_map<string, const rule_ptr> aux_rules;
|
||||
unordered_map<string, const rule_ptr> tokens;
|
||||
|
||||
for (auto pair : input_grammar.rules) {
|
||||
|
|
@ -73,13 +74,20 @@ namespace tree_sitter {
|
|||
else
|
||||
tokens.insert({ name, rule });
|
||||
}
|
||||
|
||||
for (auto pair : extractor.tokens)
|
||||
tokens.insert(pair);
|
||||
|
||||
|
||||
for (auto pair : input_grammar.aux_rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
auto new_rule = extractor.initial_apply(rule);
|
||||
if (new_rule.get())
|
||||
aux_rules.insert({ name, new_rule });
|
||||
else
|
||||
tokens.insert({ name, rule });
|
||||
}
|
||||
|
||||
return {
|
||||
Grammar(input_grammar.start_rule_name, rules),
|
||||
Grammar("", tokens)
|
||||
Grammar("", tokens, extractor.tokens)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,10 @@ using std::pair;
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
pair<Grammar, Grammar> perform(const Grammar &input_grammar) {
|
||||
auto rule_grammar = expand_repeats(input_grammar);
|
||||
return prepare_grammar::extract_tokens(rule_grammar);
|
||||
auto grammars = prepare_grammar::extract_tokens(input_grammar);
|
||||
auto rule_grammar = expand_repeats(grammars.first);
|
||||
auto lex_grammar = grammars.second;
|
||||
return { rule_grammar, lex_grammar };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
rule_ptr sym(const string &name) {
|
||||
return make_shared<Symbol>(name);
|
||||
return make_shared<Symbol>(name, false);
|
||||
}
|
||||
|
||||
rule_ptr aux_sym(const string &name) {
|
||||
return make_shared<Symbol>(name, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ namespace tree_sitter {
|
|||
rule_ptr seq(const std::initializer_list<rule_ptr> &rules);
|
||||
rule_ptr str(const std::string &value);
|
||||
rule_ptr sym(const std::string &name);
|
||||
rule_ptr aux_sym(const std::string &name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,15 +5,16 @@ using std::hash;
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Symbol::Symbol(const std::string &name) : name(name) {};
|
||||
Symbol::Symbol(const std::string &name) : name(name), is_auxiliary(false) {};
|
||||
Symbol::Symbol(const std::string &name, bool is_auxiliary) : name(name), is_auxiliary(is_auxiliary) {};
|
||||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
|
||||
return other && (other->name == name);
|
||||
return other && (other->name == name) && (other->is_auxiliary == is_auxiliary);
|
||||
}
|
||||
|
||||
size_t Symbol::hash_code() const {
|
||||
return typeid(this).hash_code() ^ hash<string>()(name);
|
||||
return typeid(this).hash_code() ^ hash<string>()(name) ^ hash<bool>()(is_auxiliary);
|
||||
}
|
||||
|
||||
rule_ptr Symbol::copy() const {
|
||||
|
|
@ -21,11 +22,15 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
string Symbol::to_string() const {
|
||||
return string("#<sym '") + name + "'>";
|
||||
return is_auxiliary ?
|
||||
string("#<aux_sym '") + name + "'>" :
|
||||
string("#<sym '") + name + "'>";
|
||||
}
|
||||
|
||||
bool Symbol::operator<(const Symbol &other) const {
|
||||
return name < other.name;
|
||||
if (name < other.name) return true;
|
||||
if (other.name < name) return false;
|
||||
return is_auxiliary < other.is_auxiliary;
|
||||
}
|
||||
|
||||
void Symbol::accept(Visitor &visitor) const {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ namespace tree_sitter {
|
|||
class Symbol : public Rule {
|
||||
public:
|
||||
Symbol(const std::string &name);
|
||||
Symbol(const std::string &name, bool is_auxiliary);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
|
|
@ -17,6 +18,7 @@ namespace tree_sitter {
|
|||
bool operator<(const Symbol &other) const;
|
||||
|
||||
std::string name;
|
||||
bool is_auxiliary;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue