Refactor extract tokens function

This commit is contained in:
Max Brunsfeld 2014-01-24 23:41:43 -08:00
parent 67fa81d079
commit b85b15db42
4 changed files with 42 additions and 41 deletions

View file

@ -5,9 +5,9 @@ enum ts_symbol {
ts_symbol_expression,
ts_symbol_factor,
ts_symbol_term,
ts_symbol_times,
ts_symbol_2,
ts_symbol_1,
ts_symbol_2,
ts_symbol_times,
ts_symbol_plus,
ts_symbol_number,
ts_symbol___END__,
@ -18,9 +18,9 @@ static const char *ts_symbol_names[] = {
"expression",
"factor",
"term",
"times",
"2",
"1",
"2",
"times",
"plus",
"number",
"__END__",

View file

@ -8,13 +8,13 @@ enum ts_symbol {
ts_symbol_repeat_helper2,
ts_symbol_array,
ts_symbol___END__,
ts_symbol_number,
ts_symbol_6,
ts_symbol_1,
ts_symbol_2,
ts_symbol_4,
ts_symbol_7,
ts_symbol_5,
ts_symbol_2,
ts_symbol_1,
ts_symbol_4,
ts_symbol_6,
ts_symbol_number,
ts_symbol_3,
ts_symbol_string,
};
@ -26,13 +26,13 @@ static const char *ts_symbol_names[] = {
"repeat_helper2",
"array",
"__END__",
"number",
"6",
"1",
"2",
"4",
"7",
"5",
"2",
"1",
"4",
"6",
"number",
"3",
"string",
};

View file

@ -20,7 +20,7 @@ describe("json", []() {
it("parses objects", [&]() {
TSDocumentSetText(document, "{\"key1\":1}");
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))"));
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (2) (6)))"));
TSDocumentSetText(document, "{\"key1\":1,\"key2\":2}");
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))"));

View file

@ -6,76 +6,77 @@ using std::pair;
using std::string;
using std::to_string;
using std::unordered_map;
using namespace tree_sitter::rules;
namespace tree_sitter {
namespace prepare_grammar {
class TokenExtractor : rules::Visitor {
class TokenExtractor : Visitor {
public:
rules::rule_ptr value;
size_t anonymous_token_count = 0;
unordered_map<string, const rules::rule_ptr> tokens;
rule_ptr value;
unordered_map<string, const rule_ptr> tokens;
rules::rule_ptr initial_apply(string name, const rules::rule_ptr rule) {
auto result = apply(rule);
auto symbol = std::dynamic_pointer_cast<const rules::Symbol>(result);
if (symbol && *symbol != *rule) {
tokens.insert({ name, tokens[symbol->name] });
tokens.erase(symbol->name);
anonymous_token_count--;
return rules::rule_ptr();
rule_ptr initial_apply(const rule_ptr rule) {
if (!search_for_symbols(rule)) {
return rule_ptr();
} else {
return result;
return apply(rule);
}
}
rules::rule_ptr apply(const rules::rule_ptr rule) {
rule_ptr apply(const rule_ptr rule) {
if (search_for_symbols(rule)) {
rule->accept(*this);
return value;
} else {
string token_name = add_token(rule);
return rules::sym(token_name);
return sym(token_name);
}
}
string add_token(const rules::rule_ptr &rule) {
string add_token(const rule_ptr &rule) {
for (auto pair : tokens)
if (*pair.second == *rule)
return pair.first;
string name = to_string(++anonymous_token_count);
string name = to_string(tokens.size() + 1);
tokens.insert({ name, rule });
return name;
}
void default_visit(const rules::Rule *rule) {
void default_visit(const Rule *rule) {
value = rule->copy();
}
void visit(const rules::Choice *choice) {
value = rules::choice({ apply(choice->left), apply(choice->right) });
void visit(const Choice *rule) {
value = choice({ apply(rule->left), apply(rule->right) });
}
void visit(const rules::Seq *seq) {
value = rules::seq({ apply(seq->left), apply(seq->right) });
void visit(const Seq *rule) {
value = seq({ apply(rule->left), apply(rule->right) });
}
};
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
TokenExtractor extractor;
unordered_map<string, const rules::rule_ptr> rules;
unordered_map<string, const rule_ptr> rules;
unordered_map<string, const rule_ptr> tokens;
for (auto pair : input_grammar.rules) {
string name = pair.first;
auto new_rule = extractor.initial_apply(name, pair.second);
rule_ptr rule = pair.second;
auto new_rule = extractor.initial_apply(rule);
if (new_rule.get())
rules.insert({ name, new_rule });
else
tokens.insert({ name, rule });
}
extractor.tokens.insert({ "__END__", rules::character('\0') });
for (auto pair : extractor.tokens)
tokens.insert(pair);
tokens.insert({ "__END__", character('\0') });
return {
Grammar(input_grammar.start_rule_name, rules),
Grammar("", extractor.tokens)
Grammar("", tokens)
};
}
}