From acad97cfd250482415d977a0a8d621189fa5145d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 19 Feb 2014 13:05:54 -0800 Subject: [PATCH] Separate concepts of hidden and auxiliary symbols This way, we can hide auxiliary symbols from library users, but still allow them to use hidden symbols --- examples/grammars/json.hpp | 15 +- examples/parsers/json.c | 190 +++++++++--------- include/tree_sitter/compiler.h | 2 +- spec/compiler/prepare_grammar_spec.cpp | 11 +- .../build_tables/item_set_transitions.cpp | 2 +- src/compiler/build_tables/perform.cpp | 4 +- src/compiler/generate_code/c_code.cpp | 2 +- src/compiler/grammar.cpp | 2 +- .../prepare_grammar/expand_repeats.cpp | 4 +- .../prepare_grammar/extract_tokens.cpp | 2 +- src/compiler/rules/rules.cpp | 6 +- src/compiler/rules/symbol.cpp | 32 ++- src/compiler/rules/symbol.h | 13 +- 13 files changed, 154 insertions(+), 131 deletions(-) diff --git a/examples/grammars/json.hpp b/examples/grammars/json.hpp index df965147..9a5e6cf5 100644 --- a/examples/grammars/json.hpp +++ b/examples/grammars/json.hpp @@ -11,7 +11,7 @@ namespace test_grammars { return choice({ seq({ rule, - repeat(seq({ aux_sym("comma"), rule })), + repeat(seq({ _sym("comma"), rule })), }), blank(), }); @@ -25,16 +25,16 @@ namespace test_grammars { sym("string"), sym("number") }) }, { "object", seq({ - aux_sym("left_brace"), + _sym("left_brace"), comma_sep(seq({ sym("string"), - aux_sym("colon"), + _sym("colon"), sym("value") })), - aux_sym("right_brace"), }) }, + _sym("right_brace"), }) }, { "array", seq({ - aux_sym("left_bracket"), + _sym("left_bracket"), comma_sep(sym("value")), - aux_sym("right_bracket"), }) }, + _sym("right_bracket"), }) }, { "string", seq({ character({ '"' }), repeat(choice({ @@ -42,8 +42,7 @@ namespace test_grammars { str("\\\""), })), character({ '"' }) }) }, - { "number", pattern("\\d+") } - }, { + { "number", pattern("\\d+") }, { "comma", str(",") }, { "colon", str(":") }, { "left_bracket", str("[") }, diff --git a/examples/parsers/json.c b/examples/parsers/json.c index 7b0671eb..08258c19 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -6,15 +6,15 @@ enum ts_symbol { ts_symbol_object, ts_symbol_string, ts_symbol_value, - ts_aux_colon, - ts_aux_comma, + ts_symbol_colon, + ts_symbol_comma, + ts_symbol_left_brace, + ts_symbol_left_bracket, + ts_symbol_right_brace, + ts_symbol_right_bracket, ts_aux_end, - ts_aux_left_brace, - ts_aux_left_bracket, ts_aux_repeat_helper1, ts_aux_repeat_helper2, - ts_aux_right_brace, - ts_aux_right_bracket, }; static const char *ts_symbol_names[] = { @@ -25,13 +25,13 @@ static const char *ts_symbol_names[] = { "value", "colon", "comma", - "end", "left_brace", "left_bracket", - "repeat_helper1", - "repeat_helper2", "right_brace", "right_bracket", + "end", + "repeat_helper1", + "repeat_helper2", }; static void ts_lex(TSParser *parser) { @@ -50,9 +50,9 @@ static void ts_lex(TSParser *parser) { ADVANCE(4); LEX_ERROR(2, EXPECT({",", "}"})); case 3: - ACCEPT_TOKEN(ts_aux_comma); + ACCEPT_TOKEN(ts_symbol_comma); case 4: - ACCEPT_TOKEN(ts_aux_right_brace); + ACCEPT_TOKEN(ts_symbol_right_brace); case 5: if (LOOKAHEAD_CHAR() == '}') ADVANCE(4); @@ -64,7 +64,7 @@ static void ts_lex(TSParser *parser) { ADVANCE(7); LEX_ERROR(2, EXPECT({",", "]"})); case 7: - ACCEPT_TOKEN(ts_aux_right_bracket); + ACCEPT_TOKEN(ts_symbol_right_bracket); case 8: if (LOOKAHEAD_CHAR() == ']') ADVANCE(7); @@ -136,15 +136,15 @@ static void ts_lex(TSParser *parser) { ADVANCE(16); ACCEPT_TOKEN(ts_symbol_number); case 17: - ACCEPT_TOKEN(ts_aux_left_bracket); + ACCEPT_TOKEN(ts_symbol_left_bracket); case 18: - ACCEPT_TOKEN(ts_aux_left_brace); + ACCEPT_TOKEN(ts_symbol_left_brace); case 19: if (LOOKAHEAD_CHAR() == ':') ADVANCE(20); LEX_ERROR(1, EXPECT({":"})); case 20: - ACCEPT_TOKEN(ts_aux_colon); + ACCEPT_TOKEN(ts_symbol_colon); case 21: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(10); @@ -189,9 +189,9 @@ static TSParseResult ts_parse(const char *input) { SHIFT(1); case ts_symbol_value: SHIFT(2); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(3); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(44); default: PARSE_PANIC(); @@ -217,7 +217,7 @@ static TSParseResult ts_parse(const char *input) { switch (LOOKAHEAD_SYM()) { case ts_symbol_string: SHIFT(4); - case ts_aux_right_brace: + case ts_symbol_right_brace: SHIFT(43); default: PARSE_PANIC(); @@ -225,7 +225,7 @@ static TSParseResult ts_parse(const char *input) { case 4: SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_aux_colon: + case ts_symbol_colon: SHIFT(5); default: PARSE_PANIC(); @@ -243,9 +243,9 @@ static TSParseResult ts_parse(const char *input) { SHIFT(6); case ts_symbol_value: SHIFT(7); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(13); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(19); default: PARSE_PANIC(); @@ -253,9 +253,9 @@ static TSParseResult ts_parse(const char *input) { case 6: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - case ts_aux_right_brace: + case ts_symbol_right_brace: REDUCE(ts_symbol_value, 1, COLLAPSE({0})); default: PARSE_PANIC(); @@ -263,12 +263,12 @@ static TSParseResult ts_parse(const char *input) { case 7: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: SHIFT(8); + case ts_symbol_right_brace: + REDUCE(ts_aux_repeat_helper2, 0, COLLAPSE({})); case ts_aux_repeat_helper2: SHIFT(41); - case ts_aux_right_brace: - REDUCE(ts_aux_repeat_helper2, 0, COLLAPSE({})); default: PARSE_PANIC(); } @@ -283,7 +283,7 @@ static TSParseResult ts_parse(const char *input) { case 9: SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_aux_colon: + case ts_symbol_colon: SHIFT(10); default: PARSE_PANIC(); @@ -301,9 +301,9 @@ static TSParseResult ts_parse(const char *input) { SHIFT(6); case ts_symbol_value: SHIFT(11); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(13); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(19); default: PARSE_PANIC(); @@ -311,19 +311,19 @@ static TSParseResult ts_parse(const char *input) { case 11: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: SHIFT(8); + case ts_symbol_right_brace: + REDUCE(ts_aux_repeat_helper2, 0, COLLAPSE({})); case ts_aux_repeat_helper2: SHIFT(12); - case ts_aux_right_brace: - REDUCE(ts_aux_repeat_helper2, 0, COLLAPSE({})); default: PARSE_PANIC(); } case 12: SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { - case ts_aux_right_brace: + case ts_symbol_right_brace: REDUCE(ts_aux_repeat_helper2, 5, COLLAPSE({1, 0, 1, 0, 1})); default: PARSE_PANIC(); @@ -333,7 +333,7 @@ static TSParseResult ts_parse(const char *input) { switch (LOOKAHEAD_SYM()) { case ts_symbol_string: SHIFT(14); - case ts_aux_right_brace: + case ts_symbol_right_brace: SHIFT(40); default: PARSE_PANIC(); @@ -341,7 +341,7 @@ static TSParseResult ts_parse(const char *input) { case 14: SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_aux_colon: + case ts_symbol_colon: SHIFT(15); default: PARSE_PANIC(); @@ -359,9 +359,9 @@ static TSParseResult ts_parse(const char *input) { SHIFT(6); case ts_symbol_value: SHIFT(16); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(13); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(19); default: PARSE_PANIC(); @@ -369,19 +369,19 @@ static TSParseResult ts_parse(const char *input) { case 16: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: SHIFT(8); + case ts_symbol_right_brace: + REDUCE(ts_aux_repeat_helper2, 0, COLLAPSE({})); case ts_aux_repeat_helper2: SHIFT(17); - case ts_aux_right_brace: - REDUCE(ts_aux_repeat_helper2, 0, COLLAPSE({})); default: PARSE_PANIC(); } case 17: SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { - case ts_aux_right_brace: + case ts_symbol_right_brace: SHIFT(18); default: PARSE_PANIC(); @@ -389,9 +389,9 @@ static TSParseResult ts_parse(const char *input) { case 18: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); - case ts_aux_right_brace: + case ts_symbol_right_brace: REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: PARSE_PANIC(); @@ -409,11 +409,11 @@ static TSParseResult ts_parse(const char *input) { SHIFT(20); case ts_symbol_value: SHIFT(21); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(25); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(32); - case ts_aux_right_bracket: + case ts_symbol_right_bracket: SHIFT(39); default: PARSE_PANIC(); @@ -421,9 +421,9 @@ static TSParseResult ts_parse(const char *input) { case 20: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - case ts_aux_right_bracket: + case ts_symbol_right_bracket: REDUCE(ts_symbol_value, 1, COLLAPSE({0})); default: PARSE_PANIC(); @@ -431,12 +431,12 @@ static TSParseResult ts_parse(const char *input) { case 21: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: SHIFT(22); + case ts_symbol_right_bracket: + REDUCE(ts_aux_repeat_helper1, 0, COLLAPSE({})); case ts_aux_repeat_helper1: SHIFT(37); - case ts_aux_right_bracket: - REDUCE(ts_aux_repeat_helper1, 0, COLLAPSE({})); default: PARSE_PANIC(); } @@ -453,9 +453,9 @@ static TSParseResult ts_parse(const char *input) { SHIFT(20); case ts_symbol_value: SHIFT(23); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(25); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(32); default: PARSE_PANIC(); @@ -463,19 +463,19 @@ static TSParseResult ts_parse(const char *input) { case 23: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: SHIFT(22); + case ts_symbol_right_bracket: + REDUCE(ts_aux_repeat_helper1, 0, COLLAPSE({})); case ts_aux_repeat_helper1: SHIFT(24); - case ts_aux_right_bracket: - REDUCE(ts_aux_repeat_helper1, 0, COLLAPSE({})); default: PARSE_PANIC(); } case 24: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_aux_right_bracket: + case ts_symbol_right_bracket: REDUCE(ts_aux_repeat_helper1, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); @@ -485,7 +485,7 @@ static TSParseResult ts_parse(const char *input) { switch (LOOKAHEAD_SYM()) { case ts_symbol_string: SHIFT(26); - case ts_aux_right_brace: + case ts_symbol_right_brace: SHIFT(31); default: PARSE_PANIC(); @@ -493,7 +493,7 @@ static TSParseResult ts_parse(const char *input) { case 26: SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { - case ts_aux_colon: + case ts_symbol_colon: SHIFT(27); default: PARSE_PANIC(); @@ -511,9 +511,9 @@ static TSParseResult ts_parse(const char *input) { SHIFT(6); case ts_symbol_value: SHIFT(28); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(13); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(19); default: PARSE_PANIC(); @@ -521,19 +521,19 @@ static TSParseResult ts_parse(const char *input) { case 28: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: SHIFT(8); + case ts_symbol_right_brace: + REDUCE(ts_aux_repeat_helper2, 0, COLLAPSE({})); case ts_aux_repeat_helper2: SHIFT(29); - case ts_aux_right_brace: - REDUCE(ts_aux_repeat_helper2, 0, COLLAPSE({})); default: PARSE_PANIC(); } case 29: SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { - case ts_aux_right_brace: + case ts_symbol_right_brace: SHIFT(30); default: PARSE_PANIC(); @@ -541,9 +541,9 @@ static TSParseResult ts_parse(const char *input) { case 30: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); - case ts_aux_right_bracket: + case ts_symbol_right_bracket: REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: PARSE_PANIC(); @@ -551,9 +551,9 @@ static TSParseResult ts_parse(const char *input) { case 31: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_object, 2, COLLAPSE({1, 1})); - case ts_aux_right_bracket: + case ts_symbol_right_bracket: REDUCE(ts_symbol_object, 2, COLLAPSE({1, 1})); default: PARSE_PANIC(); @@ -571,11 +571,11 @@ static TSParseResult ts_parse(const char *input) { SHIFT(20); case ts_symbol_value: SHIFT(33); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(25); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(32); - case ts_aux_right_bracket: + case ts_symbol_right_bracket: SHIFT(36); default: PARSE_PANIC(); @@ -583,19 +583,19 @@ static TSParseResult ts_parse(const char *input) { case 33: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: SHIFT(22); + case ts_symbol_right_bracket: + REDUCE(ts_aux_repeat_helper1, 0, COLLAPSE({})); case ts_aux_repeat_helper1: SHIFT(34); - case ts_aux_right_bracket: - REDUCE(ts_aux_repeat_helper1, 0, COLLAPSE({})); default: PARSE_PANIC(); } case 34: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_aux_right_bracket: + case ts_symbol_right_bracket: SHIFT(35); default: PARSE_PANIC(); @@ -603,9 +603,9 @@ static TSParseResult ts_parse(const char *input) { case 35: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); - case ts_aux_right_bracket: + case ts_symbol_right_bracket: REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); default: PARSE_PANIC(); @@ -613,9 +613,9 @@ static TSParseResult ts_parse(const char *input) { case 36: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_array, 2, COLLAPSE({1, 1})); - case ts_aux_right_bracket: + case ts_symbol_right_bracket: REDUCE(ts_symbol_array, 2, COLLAPSE({1, 1})); default: PARSE_PANIC(); @@ -623,7 +623,7 @@ static TSParseResult ts_parse(const char *input) { case 37: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_aux_right_bracket: + case ts_symbol_right_bracket: SHIFT(38); default: PARSE_PANIC(); @@ -631,9 +631,9 @@ static TSParseResult ts_parse(const char *input) { case 38: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); - case ts_aux_right_brace: + case ts_symbol_right_brace: REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); default: PARSE_PANIC(); @@ -641,9 +641,9 @@ static TSParseResult ts_parse(const char *input) { case 39: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_array, 2, COLLAPSE({1, 1})); - case ts_aux_right_brace: + case ts_symbol_right_brace: REDUCE(ts_symbol_array, 2, COLLAPSE({1, 1})); default: PARSE_PANIC(); @@ -651,9 +651,9 @@ static TSParseResult ts_parse(const char *input) { case 40: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: REDUCE(ts_symbol_object, 2, COLLAPSE({1, 1})); - case ts_aux_right_brace: + case ts_symbol_right_brace: REDUCE(ts_symbol_object, 2, COLLAPSE({1, 1})); default: PARSE_PANIC(); @@ -661,7 +661,7 @@ static TSParseResult ts_parse(const char *input) { case 41: SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { - case ts_aux_right_brace: + case ts_symbol_right_brace: SHIFT(42); default: PARSE_PANIC(); @@ -695,11 +695,11 @@ static TSParseResult ts_parse(const char *input) { SHIFT(20); case ts_symbol_value: SHIFT(45); - case ts_aux_left_brace: + case ts_symbol_left_brace: SHIFT(25); - case ts_aux_left_bracket: + case ts_symbol_left_bracket: SHIFT(32); - case ts_aux_right_bracket: + case ts_symbol_right_bracket: SHIFT(48); default: PARSE_PANIC(); @@ -707,19 +707,19 @@ static TSParseResult ts_parse(const char *input) { case 45: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_comma: + case ts_symbol_comma: SHIFT(22); + case ts_symbol_right_bracket: + REDUCE(ts_aux_repeat_helper1, 0, COLLAPSE({})); case ts_aux_repeat_helper1: SHIFT(46); - case ts_aux_right_bracket: - REDUCE(ts_aux_repeat_helper1, 0, COLLAPSE({})); default: PARSE_PANIC(); } case 46: SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { - case ts_aux_right_bracket: + case ts_symbol_right_bracket: SHIFT(47); default: PARSE_PANIC(); diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 6dde9200..514f57a2 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -47,7 +47,7 @@ namespace tree_sitter { rule_ptr seq(const std::vector &rules); rule_ptr str(const std::string &value); rule_ptr sym(const std::string &name); - rule_ptr aux_sym(const std::string &name); + rule_ptr _sym(const std::string &name); } } diff --git a/spec/compiler/prepare_grammar_spec.cpp b/spec/compiler/prepare_grammar_spec.cpp index a501c3cd..63b06ba1 100644 --- a/spec/compiler/prepare_grammar_spec.cpp +++ b/spec/compiler/prepare_grammar_spec.cpp @@ -1,5 +1,6 @@ #include "spec_helper.h" #include "prepare_grammar/perform.h" +#include "rules/symbol.h" START_TEST @@ -23,11 +24,11 @@ describe("preparing a grammar", []() { AssertThat(result.first, Equals(Grammar("rule1", { { "rule1", seq({ - aux_sym("token1"), + make_shared("token1", SymbolTypeAuxiliary), seq({ sym("rule2"), sym("rule3") }), - aux_sym("token1") }) } + make_shared("token1", SymbolTypeAuxiliary) }) } }))); AssertThat(result.second, Equals(Grammar("", map(), { @@ -101,14 +102,14 @@ describe("preparing a grammar", []() { AssertThat(result, Equals(Grammar("rule1", { { "rule1", seq({ sym("x"), - aux_sym("repeat_helper1"), + make_shared("repeat_helper1", SymbolTypeAuxiliary), sym("y") }) }, }, { { "repeat_helper1", choice({ seq({ seq({ sym("a"), sym("b") }), - aux_sym("repeat_helper1"), + make_shared("repeat_helper1", SymbolTypeAuxiliary), }), blank(), }) } @@ -127,7 +128,7 @@ describe("preparing a grammar", []() { AssertThat(result.first, Equals(Grammar("rule1", { { "rule1", seq({ sym("x"), - aux_sym("token1"), + make_shared("token1", SymbolTypeAuxiliary), sym("y") }) }, }))); diff --git a/src/compiler/build_tables/item_set_transitions.cpp b/src/compiler/build_tables/item_set_transitions.cpp index 9dcfb12f..ea09ab09 100644 --- a/src/compiler/build_tables/item_set_transitions.cpp +++ b/src/compiler/build_tables/item_set_transitions.cpp @@ -24,7 +24,7 @@ namespace tree_sitter { for (auto transition : sym_transitions(item.rule)) { Symbol rule = transition.first; auto consumed_symbols = item.consumed_symbols; - consumed_symbols.push_back(rule.is_auxiliary); + consumed_symbols.push_back(rule.is_hidden()); ParseItem new_item(item.lhs, transition.second, consumed_symbols, item.lookahead_sym); result.insert({ rule, item_set_closure(ParseItemSet({ new_item }), grammar) }); } diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index c09d9121..fbe354dc 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -14,8 +14,8 @@ namespace tree_sitter { namespace build_tables { static int NOT_FOUND = -1; - static Symbol START("start", true); - static Symbol END_OF_INPUT("end", true); + static Symbol START("start", rules::SymbolTypeAuxiliary); + static Symbol END_OF_INPUT("end", rules::SymbolTypeAuxiliary); class TableBuilder { const Grammar grammar; diff --git a/src/compiler/generate_code/c_code.cpp b/src/compiler/generate_code/c_code.cpp index ce4ddf26..9cb0d67b 100644 --- a/src/compiler/generate_code/c_code.cpp +++ b/src/compiler/generate_code/c_code.cpp @@ -85,7 +85,7 @@ namespace tree_sitter { {} string symbol_id(rules::Symbol symbol) { - if (symbol.is_auxiliary) + if (symbol.is_auxiliary()) return "ts_aux_" + symbol.name; else return "ts_symbol_" + symbol.name; diff --git a/src/compiler/grammar.cpp b/src/compiler/grammar.cpp index d4bb1d1d..b8d6e71b 100644 --- a/src/compiler/grammar.cpp +++ b/src/compiler/grammar.cpp @@ -24,7 +24,7 @@ namespace tree_sitter { aux_rules(aux_rules) {} const rule_ptr Grammar::rule(const Symbol &symbol) const { - auto map = symbol.is_auxiliary ? aux_rules : rules; + auto map = symbol.is_auxiliary() ? aux_rules : rules; auto iter = map.find(symbol.name); if (iter != map.end()) return iter->second; diff --git a/src/compiler/prepare_grammar/expand_repeats.cpp b/src/compiler/prepare_grammar/expand_repeats.cpp index 4b7f52a0..a680f8aa 100644 --- a/src/compiler/prepare_grammar/expand_repeats.cpp +++ b/src/compiler/prepare_grammar/expand_repeats.cpp @@ -27,7 +27,7 @@ namespace tree_sitter { rule_ptr make_repeat_helper(string name, const rule_ptr &rule) { return Choice::Build({ - Seq::Build({ rule, make_shared(name, true) }), + Seq::Build({ rule, make_shared(name, SymbolTypeAuxiliary) }), make_shared() }); } @@ -35,7 +35,7 @@ namespace tree_sitter { rule_ptr inner_rule = apply(rule->content); string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1); aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) }); - value = make_shared(helper_rule_name, true); + value = make_shared(helper_rule_name, SymbolTypeAuxiliary); } void visit(const Seq *rule) { diff --git a/src/compiler/prepare_grammar/extract_tokens.cpp b/src/compiler/prepare_grammar/extract_tokens.cpp index 951f90f2..7687f58d 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cpp +++ b/src/compiler/prepare_grammar/extract_tokens.cpp @@ -37,7 +37,7 @@ namespace tree_sitter { return value; } else { string token_name = add_token(rule); - return make_shared(token_name, true); + return make_shared(token_name, SymbolTypeAuxiliary); } } diff --git a/src/compiler/rules/rules.cpp b/src/compiler/rules/rules.cpp index eeb3f12c..db7352b6 100644 --- a/src/compiler/rules/rules.cpp +++ b/src/compiler/rules/rules.cpp @@ -52,11 +52,11 @@ namespace tree_sitter { } rule_ptr sym(const string &name) { - return make_shared(name, false); + return make_shared(name); } - rule_ptr aux_sym(const string &name) { - return make_shared(name, true); + rule_ptr _sym(const string &name) { + return make_shared(name, SymbolTypeHidden); } } } diff --git a/src/compiler/rules/symbol.cpp b/src/compiler/rules/symbol.cpp index eac7f799..ec8e05f7 100644 --- a/src/compiler/rules/symbol.cpp +++ b/src/compiler/rules/symbol.cpp @@ -1,13 +1,14 @@ #include "symbol.h" #include "visitor.h" +#include using std::string; using std::hash; namespace tree_sitter { namespace rules { - Symbol::Symbol(const std::string &name) : name(name), is_auxiliary(false) {}; - Symbol::Symbol(const std::string &name, bool is_auxiliary) : name(name), is_auxiliary(is_auxiliary) {}; + Symbol::Symbol(const std::string &name) : name(name), type(SymbolTypeNormal) {}; + Symbol::Symbol(const std::string &name, SymbolType type) : name(name), type(type) {}; bool Symbol::operator==(const Rule &rule) const { const Symbol *other = dynamic_cast(&rule); @@ -15,11 +16,11 @@ namespace tree_sitter { } bool Symbol::operator==(const Symbol &other) const { - return (other.name == name) && (other.is_auxiliary == is_auxiliary); + return (other.name == name) && (other.type == type); } size_t Symbol::hash_code() const { - return hash()(name) ^ hash()(is_auxiliary); + return hash()(name) ^ hash()(type); } rule_ptr Symbol::copy() const { @@ -27,17 +28,30 @@ namespace tree_sitter { } string Symbol::to_string() const { - return is_auxiliary ? - string("#" : - string("#"; + switch (type) { + case SymbolTypeNormal: + return string("#"; + case SymbolTypeHidden: + return string("#"; + case SymbolTypeAuxiliary: + return string("#"; + } } bool Symbol::operator<(const Symbol &other) const { - if (is_auxiliary < other.is_auxiliary) return true; - if (is_auxiliary > other.is_auxiliary) return false; + if (type < other.type) return true; + if (type > other.type) return false; return (name < other.name); } + bool Symbol::is_auxiliary() const { + return type == SymbolTypeAuxiliary; + } + + bool Symbol::is_hidden() const { + return (type == SymbolTypeHidden || type == SymbolTypeAuxiliary); + } + void Symbol::accept(Visitor &visitor) const { visitor.visit(this); } diff --git a/src/compiler/rules/symbol.h b/src/compiler/rules/symbol.h index 809567fd..7242cfca 100644 --- a/src/compiler/rules/symbol.h +++ b/src/compiler/rules/symbol.h @@ -6,10 +6,16 @@ namespace tree_sitter { namespace rules { + typedef enum { + SymbolTypeNormal, + SymbolTypeHidden, + SymbolTypeAuxiliary + } SymbolType; + class Symbol : public Rule { public: Symbol(const std::string &name); - Symbol(const std::string &name, bool is_auxiliary); + Symbol(const std::string &name, SymbolType type); bool operator==(const Rule& other) const; bool operator==(const Symbol &other) const; @@ -20,8 +26,11 @@ namespace tree_sitter { void accept(Visitor &visitor) const; bool operator<(const Symbol &other) const; + bool is_hidden() const; + bool is_auxiliary() const; + std::string name; - bool is_auxiliary; + SymbolType type; }; } }