diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index 52cbbd93..3a0a0cdd 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -5,9 +5,9 @@ enum ts_symbol { ts_symbol_expression, ts_symbol_factor, ts_symbol_term, - ts_symbol_times, - ts_symbol_2, ts_symbol_1, + ts_symbol_2, + ts_symbol_times, ts_symbol_plus, ts_symbol_number, ts_symbol___END__, @@ -18,9 +18,9 @@ static const char *ts_symbol_names[] = { "expression", "factor", "term", - "times", - "2", "1", + "2", + "times", "plus", "number", "__END__", diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index 898e132f..a4e7f189 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -8,13 +8,13 @@ enum ts_symbol { ts_symbol_repeat_helper2, ts_symbol_array, ts_symbol___END__, - ts_symbol_number, - ts_symbol_6, + ts_symbol_1, + ts_symbol_2, + ts_symbol_4, ts_symbol_7, ts_symbol_5, - ts_symbol_2, - ts_symbol_1, - ts_symbol_4, + ts_symbol_6, + ts_symbol_number, ts_symbol_3, ts_symbol_string, }; @@ -26,13 +26,13 @@ static const char *ts_symbol_names[] = { "repeat_helper2", "array", "__END__", - "number", - "6", + "1", + "2", + "4", "7", "5", - "2", - "1", - "4", + "6", + "number", "3", "string", }; diff --git a/spec/runtime/json_spec.cpp b/spec/runtime/json_spec.cpp index 8eb810da..10a9c116 100644 --- a/spec/runtime/json_spec.cpp +++ b/spec/runtime/json_spec.cpp @@ -20,7 +20,7 @@ describe("json", []() { it("parses objects", [&]() { TSDocumentSetText(document, "{\"key1\":1}"); - AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))")); + AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (2) (6)))")); TSDocumentSetText(document, "{\"key1\":1,\"key2\":2}"); AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))")); diff --git a/src/compiler/prepare_grammar/extract_tokens.cpp b/src/compiler/prepare_grammar/extract_tokens.cpp index 5b42cd65..30391b57 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cpp +++ b/src/compiler/prepare_grammar/extract_tokens.cpp @@ -6,76 +6,77 @@ using std::pair; using std::string; using std::to_string; using std::unordered_map; +using namespace tree_sitter::rules; namespace tree_sitter { namespace prepare_grammar { - class TokenExtractor : rules::Visitor { + class TokenExtractor : Visitor { public: - rules::rule_ptr value; - size_t anonymous_token_count = 0; - unordered_map tokens; + rule_ptr value; + unordered_map tokens; - rules::rule_ptr initial_apply(string name, const rules::rule_ptr rule) { - auto result = apply(rule); - auto symbol = std::dynamic_pointer_cast(result); - if (symbol && *symbol != *rule) { - tokens.insert({ name, tokens[symbol->name] }); - tokens.erase(symbol->name); - anonymous_token_count--; - return rules::rule_ptr(); + rule_ptr initial_apply(const rule_ptr rule) { + if (!search_for_symbols(rule)) { + return rule_ptr(); } else { - return result; + return apply(rule); } } - rules::rule_ptr apply(const rules::rule_ptr rule) { + rule_ptr apply(const rule_ptr rule) { if (search_for_symbols(rule)) { rule->accept(*this); return value; } else { string token_name = add_token(rule); - return rules::sym(token_name); + return sym(token_name); } } - string add_token(const rules::rule_ptr &rule) { + string add_token(const rule_ptr &rule) { for (auto pair : tokens) if (*pair.second == *rule) return pair.first; - string name = to_string(++anonymous_token_count); + string name = to_string(tokens.size() + 1); tokens.insert({ name, rule }); return name; } - void default_visit(const rules::Rule *rule) { + void default_visit(const Rule *rule) { value = rule->copy(); } - void visit(const rules::Choice *choice) { - value = rules::choice({ apply(choice->left), apply(choice->right) }); + void visit(const Choice *rule) { + value = choice({ apply(rule->left), apply(rule->right) }); } - void visit(const rules::Seq *seq) { - value = rules::seq({ apply(seq->left), apply(seq->right) }); + void visit(const Seq *rule) { + value = seq({ apply(rule->left), apply(rule->right) }); } }; pair extract_tokens(const Grammar &input_grammar) { TokenExtractor extractor; - unordered_map rules; + unordered_map rules; + unordered_map tokens; for (auto pair : input_grammar.rules) { string name = pair.first; - auto new_rule = extractor.initial_apply(name, pair.second); + rule_ptr rule = pair.second; + auto new_rule = extractor.initial_apply(rule); if (new_rule.get()) rules.insert({ name, new_rule }); + else + tokens.insert({ name, rule }); } - extractor.tokens.insert({ "__END__", rules::character('\0') }); + for (auto pair : extractor.tokens) + tokens.insert(pair); + tokens.insert({ "__END__", character('\0') }); return { Grammar(input_grammar.start_rule_name, rules), - Grammar("", extractor.tokens) + Grammar("", tokens) }; } }