diff --git a/spec/compiler/build_tables/perform_spec.cpp b/spec/compiler/build_tables/perform_spec.cpp index 2a694a8e..4d0c0278 100644 --- a/spec/compiler/build_tables/perform_spec.cpp +++ b/spec/compiler/build_tables/perform_spec.cpp @@ -5,11 +5,11 @@ using build_tables::perform; using namespace rules; -typedef unordered_set parse_actions; -typedef unordered_set lex_actions; +typedef set parse_actions; +typedef set lex_actions; -static unordered_set keys(const unordered_map &map) { - unordered_set result; +static set keys(const map &map) { + set result; for (auto pair : map) { result.insert(pair.first); } @@ -63,7 +63,7 @@ describe("building parse and lex tables", []() { }; it("has the right starting state", [&]() { - AssertThat(keys(parse_state(0).actions), Equals(unordered_set({ + AssertThat(keys(parse_state(0).actions), Equals(set({ Symbol("expression"), Symbol("term"), Symbol("number"), @@ -71,7 +71,7 @@ describe("building parse and lex tables", []() { Symbol("left-paren"), }))); - AssertThat(lex_state(0).expected_inputs(), Equals(unordered_set({ + AssertThat(lex_state(0).expected_inputs(), Equals(set({ CharacterSet({ '(' }, true), CharacterSet({ {'0', '9'} }, true), CharacterSet({ {'a', 'z'}, {'A', 'Z'} }, true), @@ -79,13 +79,13 @@ describe("building parse and lex tables", []() { }); it("accepts when the start symbol is reduced", [&]() { - AssertThat(parse_state(1).actions, Equals(unordered_map({ + AssertThat(parse_state(1).actions, Equals(map({ { Symbol("__END__"), parse_actions({ ParseAction::Accept() }) } }))); }); it("has the right next states", [&]() { - AssertThat(parse_state(2).actions, Equals(unordered_map({ + AssertThat(parse_state(2).actions, Equals(map({ { Symbol("plus"), parse_actions({ ParseAction::Shift(3) }) }, { Symbol("__END__"), parse_actions({ ParseAction::Reduce(Symbol("expression"), { false }) }) }, }))); diff --git a/spec/fixtures/parsers/arithmetic.c b/spec/fixtures/parsers/arithmetic.c index 40c991d2..68da2420 100644 --- a/spec/fixtures/parsers/arithmetic.c +++ b/spec/fixtures/parsers/arithmetic.c @@ -2,29 +2,29 @@ #include enum ts_symbol { - ts_symbol_plus, - ts_symbol_factor, - ts_symbol_variable, - ts_symbol_term, - ts_symbol_expression, - ts_aux_token1, - ts_symbol_number, - ts_symbol_times, - ts_aux_token2, ts_symbol___END__, + ts_symbol_expression, + ts_symbol_factor, + ts_symbol_number, + ts_symbol_plus, + ts_symbol_term, + ts_symbol_times, + ts_symbol_variable, + ts_aux_token1, + ts_aux_token2, }; static const char *ts_symbol_names[] = { - "plus", - "factor", - "variable", - "term", - "expression", - "token1", - "number", - "times", - "token2", "__END__", + "expression", + "factor", + "number", + "plus", + "term", + "times", + "variable", + "token1", + "token2", }; static void ts_lex(TSParser *parser) { @@ -37,10 +37,10 @@ static void ts_lex(TSParser *parser) { case 1: ACCEPT_TOKEN(ts_symbol___END__); case 2: - if (LOOKAHEAD_CHAR() == '*') - ADVANCE(3); if (LOOKAHEAD_CHAR() == '\0') ADVANCE(1); + if (LOOKAHEAD_CHAR() == '*') + ADVANCE(3); LEX_ERROR(2, EXPECT({"", "*"})); case 3: ACCEPT_TOKEN(ts_symbol_times); @@ -73,38 +73,38 @@ static void ts_lex(TSParser *parser) { ADVANCE(8); LEX_ERROR(2, EXPECT({")", "+"})); case 10: - if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || - ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) - ADVANCE(13); if (LOOKAHEAD_CHAR() == '(') - ADVANCE(12); + ADVANCE(13); if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') ADVANCE(11); + if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || + ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) + ADVANCE(12); LEX_ERROR(4, EXPECT({"(", "0-9", "A-Z", "a-z"})); case 11: if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') ADVANCE(11); ACCEPT_TOKEN(ts_symbol_number); case 12: - ACCEPT_TOKEN(ts_aux_token1); - case 13: if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) - ADVANCE(13); + ADVANCE(12); ACCEPT_TOKEN(ts_symbol_variable); + case 13: + ACCEPT_TOKEN(ts_aux_token1); case 14: - if (LOOKAHEAD_CHAR() == '+') - ADVANCE(8); if (LOOKAHEAD_CHAR() == '\0') ADVANCE(1); + if (LOOKAHEAD_CHAR() == '+') + ADVANCE(8); LEX_ERROR(2, EXPECT({"", "+"})); case 15: + if (LOOKAHEAD_CHAR() == '\0') + ADVANCE(1); if (LOOKAHEAD_CHAR() == '*') ADVANCE(3); if (LOOKAHEAD_CHAR() == '+') ADVANCE(8); - if (LOOKAHEAD_CHAR() == '\0') - ADVANCE(1); LEX_ERROR(2, EXPECT({"", "*-+"})); default: LEX_PANIC(); @@ -118,18 +118,18 @@ static TSParseResult ts_parse(const char *input) { case 0: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(42); - case ts_symbol_number: - SHIFT(41); + case ts_symbol_expression: + SHIFT(1); case ts_symbol_factor: SHIFT(45); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(41); case ts_symbol_term: SHIFT(2); - case ts_symbol_expression: - SHIFT(1); + case ts_symbol_variable: + SHIFT(41); + case ts_aux_token1: + SHIFT(42); default: PARSE_PANIC(); } @@ -154,16 +154,16 @@ static TSParseResult ts_parse(const char *input) { case 3: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(6); case ts_symbol_factor: SHIFT(34); case ts_symbol_number: SHIFT(5); - case ts_symbol_variable: - SHIFT(5); case ts_symbol_term: SHIFT(4); + case ts_symbol_variable: + SHIFT(5); + case ts_aux_token1: + SHIFT(6); default: PARSE_PANIC(); } @@ -178,54 +178,54 @@ static TSParseResult ts_parse(const char *input) { case 5: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_times: - REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); case ts_symbol___END__: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); + case ts_symbol_times: + REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 6: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(13); - case ts_symbol_number: - SHIFT(12); case ts_symbol_expression: SHIFT(32); case ts_symbol_factor: SHIFT(16); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(12); case ts_symbol_term: SHIFT(7); + case ts_symbol_variable: + SHIFT(12); + case ts_aux_token1: + SHIFT(13); default: PARSE_PANIC(); } case 7: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_expression, 1, COLLAPSE({0})); case ts_symbol_plus: SHIFT(8); + case ts_aux_token2: + REDUCE(ts_symbol_expression, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 8: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(11); case ts_symbol_factor: SHIFT(25); case ts_symbol_number: SHIFT(10); - case ts_symbol_variable: - SHIFT(10); case ts_symbol_term: SHIFT(9); + case ts_symbol_variable: + SHIFT(10); + case ts_aux_token1: + SHIFT(11); default: PARSE_PANIC(); } @@ -240,39 +240,39 @@ static TSParseResult ts_parse(const char *input) { case 10: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); case ts_symbol_times: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); + case ts_aux_token2: + REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 11: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(13); - case ts_symbol_number: - SHIFT(12); case ts_symbol_expression: SHIFT(23); case ts_symbol_factor: SHIFT(16); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(12); case ts_symbol_term: SHIFT(7); + case ts_symbol_variable: + SHIFT(12); + case ts_aux_token1: + SHIFT(13); default: PARSE_PANIC(); } case 12: SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: + case ts_symbol_plus: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); case ts_symbol_times: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); - case ts_symbol_plus: + case ts_aux_token2: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); default: PARSE_PANIC(); @@ -280,18 +280,18 @@ static TSParseResult ts_parse(const char *input) { case 13: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(13); - case ts_symbol_number: - SHIFT(12); case ts_symbol_expression: SHIFT(14); case ts_symbol_factor: SHIFT(16); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(12); case ts_symbol_term: SHIFT(7); + case ts_symbol_variable: + SHIFT(12); + case ts_aux_token1: + SHIFT(13); default: PARSE_PANIC(); } @@ -306,11 +306,11 @@ static TSParseResult ts_parse(const char *input) { case 15: SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: + case ts_symbol_plus: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); case ts_symbol_times: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); - case ts_symbol_plus: + case ts_aux_token2: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); @@ -318,54 +318,54 @@ static TSParseResult ts_parse(const char *input) { case 16: SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_term, 1, COLLAPSE({0})); case ts_symbol_plus: REDUCE(ts_symbol_term, 1, COLLAPSE({0})); case ts_symbol_times: SHIFT(17); + case ts_aux_token2: + REDUCE(ts_symbol_term, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 17: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(19); case ts_symbol_factor: SHIFT(22); case ts_symbol_number: SHIFT(18); case ts_symbol_variable: SHIFT(18); + case ts_aux_token1: + SHIFT(19); default: PARSE_PANIC(); } case 18: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); case ts_symbol_plus: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); + case ts_aux_token2: + REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 19: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(13); - case ts_symbol_number: - SHIFT(12); case ts_symbol_expression: SHIFT(20); case ts_symbol_factor: SHIFT(16); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(12); case ts_symbol_term: SHIFT(7); + case ts_symbol_variable: + SHIFT(12); + case ts_aux_token1: + SHIFT(13); default: PARSE_PANIC(); } @@ -380,20 +380,20 @@ static TSParseResult ts_parse(const char *input) { case 21: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); case ts_symbol_plus: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); + case ts_aux_token2: + REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } case 22: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_term, 3, COLLAPSE({0, 0, 0})); case ts_symbol_plus: REDUCE(ts_symbol_term, 3, COLLAPSE({0, 0, 0})); + case ts_aux_token2: + REDUCE(ts_symbol_term, 3, COLLAPSE({0, 0, 0})); default: PARSE_PANIC(); } @@ -408,34 +408,34 @@ static TSParseResult ts_parse(const char *input) { case 24: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); case ts_symbol_times: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); + case ts_aux_token2: + REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } case 25: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_token2: - REDUCE(ts_symbol_term, 1, COLLAPSE({0})); case ts_symbol_times: SHIFT(26); + case ts_aux_token2: + REDUCE(ts_symbol_term, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 26: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(28); case ts_symbol_factor: SHIFT(31); case ts_symbol_number: SHIFT(27); case ts_symbol_variable: SHIFT(27); + case ts_aux_token1: + SHIFT(28); default: PARSE_PANIC(); } @@ -450,18 +450,18 @@ static TSParseResult ts_parse(const char *input) { case 28: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(13); - case ts_symbol_number: - SHIFT(12); case ts_symbol_expression: SHIFT(29); case ts_symbol_factor: SHIFT(16); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(12); case ts_symbol_term: SHIFT(7); + case ts_symbol_variable: + SHIFT(12); + case ts_aux_token1: + SHIFT(13); default: PARSE_PANIC(); } @@ -500,10 +500,10 @@ static TSParseResult ts_parse(const char *input) { case 33: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_symbol_times: - REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); case ts_symbol___END__: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); + case ts_symbol_times: + REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } @@ -520,14 +520,14 @@ static TSParseResult ts_parse(const char *input) { case 35: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(37); case ts_symbol_factor: SHIFT(40); case ts_symbol_number: SHIFT(36); case ts_symbol_variable: SHIFT(36); + case ts_aux_token1: + SHIFT(37); default: PARSE_PANIC(); } @@ -542,18 +542,18 @@ static TSParseResult ts_parse(const char *input) { case 37: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(13); - case ts_symbol_number: - SHIFT(12); case ts_symbol_expression: SHIFT(38); case ts_symbol_factor: SHIFT(16); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(12); case ts_symbol_term: SHIFT(7); + case ts_symbol_variable: + SHIFT(12); + case ts_aux_token1: + SHIFT(13); default: PARSE_PANIC(); } @@ -584,30 +584,30 @@ static TSParseResult ts_parse(const char *input) { case 41: SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { + case ts_symbol___END__: + REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); case ts_symbol_plus: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); case ts_symbol_times: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); - case ts_symbol___END__: - REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 42: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(13); - case ts_symbol_number: - SHIFT(12); case ts_symbol_expression: SHIFT(43); case ts_symbol_factor: SHIFT(16); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(12); case ts_symbol_term: SHIFT(7); + case ts_symbol_variable: + SHIFT(12); + case ts_aux_token1: + SHIFT(13); default: PARSE_PANIC(); } @@ -622,22 +622,22 @@ static TSParseResult ts_parse(const char *input) { case 44: SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { + case ts_symbol___END__: + REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); case ts_symbol_plus: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); case ts_symbol_times: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); - case ts_symbol___END__: - REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } case 45: SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { - case ts_symbol_plus: - REDUCE(ts_symbol_term, 1, COLLAPSE({0})); case ts_symbol___END__: REDUCE(ts_symbol_term, 1, COLLAPSE({0})); + case ts_symbol_plus: + REDUCE(ts_symbol_term, 1, COLLAPSE({0})); case ts_symbol_times: SHIFT(46); default: @@ -646,42 +646,42 @@ static TSParseResult ts_parse(const char *input) { case 46: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(48); case ts_symbol_factor: SHIFT(51); case ts_symbol_number: SHIFT(47); case ts_symbol_variable: SHIFT(47); + case ts_aux_token1: + SHIFT(48); default: PARSE_PANIC(); } case 47: SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { - case ts_symbol_plus: - REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); case ts_symbol___END__: REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); + case ts_symbol_plus: + REDUCE(ts_symbol_factor, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 48: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_aux_token1: - SHIFT(13); - case ts_symbol_number: - SHIFT(12); case ts_symbol_expression: SHIFT(49); case ts_symbol_factor: SHIFT(16); - case ts_symbol_variable: + case ts_symbol_number: SHIFT(12); case ts_symbol_term: SHIFT(7); + case ts_symbol_variable: + SHIFT(12); + case ts_aux_token1: + SHIFT(13); default: PARSE_PANIC(); } @@ -696,20 +696,20 @@ static TSParseResult ts_parse(const char *input) { case 50: SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { - case ts_symbol_plus: - REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); case ts_symbol___END__: REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); + case ts_symbol_plus: + REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1})); default: PARSE_PANIC(); } case 51: SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { - case ts_symbol_plus: - REDUCE(ts_symbol_term, 3, COLLAPSE({0, 0, 0})); case ts_symbol___END__: REDUCE(ts_symbol_term, 3, COLLAPSE({0, 0, 0})); + case ts_symbol_plus: + REDUCE(ts_symbol_term, 3, COLLAPSE({0, 0, 0})); default: PARSE_PANIC(); } diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index 76e35a26..d0fd262d 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -2,39 +2,39 @@ #include enum ts_symbol { - ts_aux_token6, - ts_symbol_number, - ts_symbol_string, - ts_aux_token3, - ts_aux_token5, - ts_symbol_array, - ts_aux_repeat_helper1, - ts_aux_token7, - ts_aux_token4, ts_symbol___END__, - ts_aux_token2, + ts_symbol_array, + ts_symbol_number, + ts_symbol_object, + ts_symbol_string, + ts_symbol_value, + ts_aux_repeat_helper1, ts_aux_repeat_helper2, ts_aux_token1, - ts_symbol_object, - ts_symbol_value, + ts_aux_token2, + ts_aux_token3, + ts_aux_token4, + ts_aux_token5, + ts_aux_token6, + ts_aux_token7, }; static const char *ts_symbol_names[] = { - "token6", - "number", - "string", - "token3", - "token5", - "array", - "repeat_helper1", - "token7", - "token4", "__END__", - "token2", + "array", + "number", + "object", + "string", + "value", + "repeat_helper1", "repeat_helper2", "token1", - "object", - "value", + "token2", + "token3", + "token4", + "token5", + "token6", + "token7", }; static void ts_lex(TSParser *parser) { @@ -59,10 +59,10 @@ static void ts_lex(TSParser *parser) { case 5: ACCEPT_TOKEN(ts_aux_token4); case 6: - if (LOOKAHEAD_CHAR() == ']') - ADVANCE(5); if (LOOKAHEAD_CHAR() == ',') ADVANCE(3); + if (LOOKAHEAD_CHAR() == ']') + ADVANCE(5); LEX_ERROR(2, EXPECT({",", "]"})); case 7: if (LOOKAHEAD_CHAR() == '}') @@ -71,72 +71,72 @@ static void ts_lex(TSParser *parser) { case 8: ACCEPT_TOKEN(ts_aux_token7); case 9: - if (LOOKAHEAD_CHAR() == '}') - ADVANCE(8); if (LOOKAHEAD_CHAR() == ',') ADVANCE(3); + if (LOOKAHEAD_CHAR() == '}') + ADVANCE(8); LEX_ERROR(2, EXPECT({",", "}"})); case 10: - if (LOOKAHEAD_CHAR() == '[') - ADVANCE(18); - if (LOOKAHEAD_CHAR() == '{') - ADVANCE(19); if (LOOKAHEAD_CHAR() == '\"') ADVANCE(12); if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') ADVANCE(11); + if (LOOKAHEAD_CHAR() == '[') + ADVANCE(18); + if (LOOKAHEAD_CHAR() == '{') + ADVANCE(19); LEX_ERROR(4, EXPECT({"\"", "0-9", "[", "{"})); case 11: if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') ADVANCE(11); ACCEPT_TOKEN(ts_symbol_number); case 12: + if (!((LOOKAHEAD_CHAR() == '\"') || + (LOOKAHEAD_CHAR() == '\\'))) + ADVANCE(13); if (LOOKAHEAD_CHAR() == '\\') ADVANCE(14); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') ADVANCE(15); + LEX_ERROR(2, EXPECT({"-!", "#-"})); + case 13: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) ADVANCE(13); - LEX_ERROR(2, EXPECT({"-!", "#-"})); - case 13: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(17); if (LOOKAHEAD_CHAR() == '\\') ADVANCE(14); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') ADVANCE(15); + LEX_ERROR(1, EXPECT({""})); + case 14: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) ADVANCE(13); - LEX_ERROR(1, EXPECT({""})); - case 14: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(16); + if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') + ADVANCE(13); if (LOOKAHEAD_CHAR() == '\\') ADVANCE(14); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') ADVANCE(15); - if (!((LOOKAHEAD_CHAR() == '\"') || - (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(13); - if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') - ADVANCE(13); - LEX_ERROR(2, EXPECT({"", "#-\""})); + LEX_ERROR(1, EXPECT({""})); case 15: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(13); LEX_ERROR(1, EXPECT({"\""})); case 16: + if (!((LOOKAHEAD_CHAR() == '\"') || + (LOOKAHEAD_CHAR() == '\\'))) + ADVANCE(13); if (LOOKAHEAD_CHAR() == '\"') ADVANCE(17); if (LOOKAHEAD_CHAR() == '\\') ADVANCE(14); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') ADVANCE(15); - if (!((LOOKAHEAD_CHAR() == '\"') || - (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(13); ACCEPT_TOKEN(ts_symbol_string); case 17: ACCEPT_TOKEN(ts_symbol_string); @@ -166,20 +166,20 @@ static TSParseResult ts_parse(const char *input) { case 0: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(53); case ts_symbol_array: SHIFT(53); + case ts_symbol_number: + SHIFT(53); + case ts_symbol_object: + SHIFT(53); + case ts_symbol_string: + SHIFT(53); case ts_symbol_value: SHIFT(1); - case ts_symbol_object: - SHIFT(53); - case ts_aux_token5: - SHIFT(47); - case ts_symbol_number: - SHIFT(53); case ts_aux_token1: SHIFT(2); + case ts_aux_token5: + SHIFT(47); default: PARSE_PANIC(); } @@ -194,52 +194,52 @@ static TSParseResult ts_parse(const char *input) { case 2: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(25); case ts_symbol_array: SHIFT(25); + case ts_symbol_number: + SHIFT(25); + case ts_symbol_object: + SHIFT(25); + case ts_symbol_string: + SHIFT(25); case ts_symbol_value: SHIFT(44); - case ts_symbol_object: - SHIFT(25); - case ts_aux_token5: - SHIFT(12); - case ts_symbol_number: - SHIFT(25); case ts_aux_token1: SHIFT(3); + case ts_aux_token5: + SHIFT(12); default: PARSE_PANIC(); } case 3: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(25); case ts_symbol_array: SHIFT(25); + case ts_symbol_number: + SHIFT(25); + case ts_symbol_object: + SHIFT(25); + case ts_symbol_string: + SHIFT(25); case ts_symbol_value: SHIFT(4); - case ts_symbol_object: - SHIFT(25); - case ts_aux_token5: - SHIFT(12); - case ts_symbol_number: - SHIFT(25); case ts_aux_token1: SHIFT(3); + case ts_aux_token5: + SHIFT(12); default: PARSE_PANIC(); } case 4: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper2: + SHIFT(5); case ts_aux_token2: SHIFT(7); case ts_aux_token3: SHIFT(5); - case ts_aux_repeat_helper2: - SHIFT(5); default: PARSE_PANIC(); } @@ -254,62 +254,62 @@ static TSParseResult ts_parse(const char *input) { case 6: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_token3: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); case ts_aux_token2: REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + case ts_aux_token3: + REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); default: PARSE_PANIC(); } case 7: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(43); case ts_symbol_array: - SHIFT(43); - case ts_symbol_object: - SHIFT(43); - case ts_symbol_value: SHIFT(41); - case ts_aux_token5: - SHIFT(35); case ts_symbol_number: - SHIFT(43); + SHIFT(41); + case ts_symbol_object: + SHIFT(41); + case ts_symbol_string: + SHIFT(41); + case ts_symbol_value: + SHIFT(42); case ts_aux_token1: SHIFT(8); + case ts_aux_token5: + SHIFT(35); default: PARSE_PANIC(); } case 8: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(25); case ts_symbol_array: SHIFT(25); + case ts_symbol_number: + SHIFT(25); + case ts_symbol_object: + SHIFT(25); + case ts_symbol_string: + SHIFT(25); case ts_symbol_value: SHIFT(9); - case ts_symbol_object: - SHIFT(25); - case ts_aux_token5: - SHIFT(12); - case ts_symbol_number: - SHIFT(25); case ts_aux_token1: SHIFT(3); + case ts_aux_token5: + SHIFT(12); default: PARSE_PANIC(); } case 9: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper2: + SHIFT(10); case ts_aux_token2: SHIFT(7); case ts_aux_token3: SHIFT(10); - case ts_aux_repeat_helper2: - SHIFT(10); default: PARSE_PANIC(); } @@ -324,10 +324,10 @@ static TSParseResult ts_parse(const char *input) { case 11: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_token4: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); case ts_aux_token2: REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + case ts_aux_token4: + REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); default: PARSE_PANIC(); } @@ -350,32 +350,32 @@ static TSParseResult ts_parse(const char *input) { case 14: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(25); case ts_symbol_array: SHIFT(25); + case ts_symbol_number: + SHIFT(25); case ts_symbol_object: SHIFT(25); + case ts_symbol_string: + SHIFT(25); case ts_symbol_value: SHIFT(15); - case ts_aux_token5: - SHIFT(12); - case ts_symbol_number: - SHIFT(25); case ts_aux_token1: SHIFT(3); + case ts_aux_token5: + SHIFT(12); default: PARSE_PANIC(); } case 15: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper1: + SHIFT(16); case ts_aux_token2: SHIFT(18); case ts_aux_token3: SHIFT(16); - case ts_aux_repeat_helper1: - SHIFT(16); default: PARSE_PANIC(); } @@ -390,10 +390,10 @@ static TSParseResult ts_parse(const char *input) { case 17: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_token3: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); case ts_aux_token2: REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); + case ts_aux_token3: + REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: PARSE_PANIC(); } @@ -416,52 +416,52 @@ static TSParseResult ts_parse(const char *input) { case 20: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(34); case ts_symbol_array: - SHIFT(34); - case ts_symbol_object: - SHIFT(34); - case ts_symbol_value: SHIFT(32); - case ts_aux_token5: - SHIFT(26); case ts_symbol_number: - SHIFT(34); + SHIFT(32); + case ts_symbol_object: + SHIFT(32); + case ts_symbol_string: + SHIFT(32); + case ts_symbol_value: + SHIFT(33); case ts_aux_token1: SHIFT(21); + case ts_aux_token5: + SHIFT(26); default: PARSE_PANIC(); } case 21: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(25); case ts_symbol_array: SHIFT(25); + case ts_symbol_number: + SHIFT(25); + case ts_symbol_object: + SHIFT(25); + case ts_symbol_string: + SHIFT(25); case ts_symbol_value: SHIFT(22); - case ts_symbol_object: - SHIFT(25); - case ts_aux_token5: - SHIFT(12); - case ts_symbol_number: - SHIFT(25); case ts_aux_token1: SHIFT(3); + case ts_aux_token5: + SHIFT(12); default: PARSE_PANIC(); } case 22: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper2: + SHIFT(23); case ts_aux_token2: SHIFT(7); case ts_aux_token3: SHIFT(23); - case ts_aux_repeat_helper2: - SHIFT(23); default: PARSE_PANIC(); } @@ -476,20 +476,20 @@ static TSParseResult ts_parse(const char *input) { case 24: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token7: - REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); case ts_aux_token2: REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); + case ts_aux_token7: + REDUCE(ts_symbol_array, 4, COLLAPSE({1, 0, 1, 1})); default: PARSE_PANIC(); } case 25: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { - case ts_aux_token3: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); case ts_aux_token2: REDUCE(ts_symbol_value, 1, COLLAPSE({0})); + case ts_aux_token3: + REDUCE(ts_symbol_value, 1, COLLAPSE({0})); default: PARSE_PANIC(); } @@ -512,32 +512,32 @@ static TSParseResult ts_parse(const char *input) { case 28: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(25); case ts_symbol_array: SHIFT(25); + case ts_symbol_number: + SHIFT(25); case ts_symbol_object: SHIFT(25); + case ts_symbol_string: + SHIFT(25); case ts_symbol_value: SHIFT(29); - case ts_aux_token5: - SHIFT(12); - case ts_symbol_number: - SHIFT(25); case ts_aux_token1: SHIFT(3); + case ts_aux_token5: + SHIFT(12); default: PARSE_PANIC(); } case 29: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper1: + SHIFT(30); case ts_aux_token2: SHIFT(18); case ts_aux_token3: SHIFT(30); - case ts_aux_repeat_helper1: - SHIFT(30); default: PARSE_PANIC(); } @@ -552,40 +552,40 @@ static TSParseResult ts_parse(const char *input) { case 31: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token7: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); case ts_aux_token2: REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); + case ts_aux_token7: + REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: PARSE_PANIC(); } case 32: SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { - case ts_aux_token7: - REDUCE(ts_aux_repeat_helper1, 4, COLLAPSE({1, 0, 1, 0})); case ts_aux_token2: - SHIFT(18); - case ts_aux_repeat_helper1: - SHIFT(33); + REDUCE(ts_symbol_value, 1, COLLAPSE({0})); + case ts_aux_token7: + REDUCE(ts_symbol_value, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 33: - SET_LEX_STATE(7); + SET_LEX_STATE(9); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper1: + SHIFT(34); + case ts_aux_token2: + SHIFT(18); case ts_aux_token7: - REDUCE(ts_aux_repeat_helper1, 5, COLLAPSE({1, 0, 1, 0, 1})); + REDUCE(ts_aux_repeat_helper1, 4, COLLAPSE({1, 0, 1, 0})); default: PARSE_PANIC(); } case 34: - SET_LEX_STATE(9); + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_aux_token7: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - case ts_aux_token2: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); + REDUCE(ts_aux_repeat_helper1, 5, COLLAPSE({1, 0, 1, 0, 1})); default: PARSE_PANIC(); } @@ -608,32 +608,32 @@ static TSParseResult ts_parse(const char *input) { case 37: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(25); case ts_symbol_array: SHIFT(25); + case ts_symbol_number: + SHIFT(25); case ts_symbol_object: SHIFT(25); + case ts_symbol_string: + SHIFT(25); case ts_symbol_value: SHIFT(38); - case ts_aux_token5: - SHIFT(12); - case ts_symbol_number: - SHIFT(25); case ts_aux_token1: SHIFT(3); + case ts_aux_token5: + SHIFT(12); default: PARSE_PANIC(); } case 38: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper1: + SHIFT(39); case ts_aux_token2: SHIFT(18); case ts_aux_token3: SHIFT(39); - case ts_aux_repeat_helper1: - SHIFT(39); default: PARSE_PANIC(); } @@ -648,10 +648,10 @@ static TSParseResult ts_parse(const char *input) { case 40: SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { - case ts_aux_token4: - REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); case ts_aux_token2: REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); + case ts_aux_token4: + REDUCE(ts_symbol_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: PARSE_PANIC(); } @@ -659,15 +659,25 @@ static TSParseResult ts_parse(const char *input) { SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_aux_token2: - SHIFT(7); + REDUCE(ts_symbol_value, 1, COLLAPSE({0})); case ts_aux_token4: - REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0})); - case ts_aux_repeat_helper2: - SHIFT(42); + REDUCE(ts_symbol_value, 1, COLLAPSE({0})); default: PARSE_PANIC(); } case 42: + SET_LEX_STATE(6); + switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper2: + SHIFT(43); + case ts_aux_token2: + SHIFT(7); + case ts_aux_token4: + REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0})); + default: + PARSE_PANIC(); + } + case 43: SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_aux_token4: @@ -675,25 +685,15 @@ static TSParseResult ts_parse(const char *input) { default: PARSE_PANIC(); } - case 43: - SET_LEX_STATE(6); - switch (LOOKAHEAD_SYM()) { - case ts_aux_token4: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - case ts_aux_token2: - REDUCE(ts_symbol_value, 1, COLLAPSE({0})); - default: - PARSE_PANIC(); - } case 44: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper2: + SHIFT(45); case ts_aux_token2: SHIFT(7); case ts_aux_token3: SHIFT(45); - case ts_aux_repeat_helper2: - SHIFT(45); default: PARSE_PANIC(); } @@ -732,32 +732,32 @@ static TSParseResult ts_parse(const char *input) { case 49: SET_LEX_STATE(10); switch (LOOKAHEAD_SYM()) { - case ts_symbol_string: - SHIFT(25); case ts_symbol_array: SHIFT(25); + case ts_symbol_number: + SHIFT(25); case ts_symbol_object: SHIFT(25); + case ts_symbol_string: + SHIFT(25); case ts_symbol_value: SHIFT(50); - case ts_aux_token5: - SHIFT(12); - case ts_symbol_number: - SHIFT(25); case ts_aux_token1: SHIFT(3); + case ts_aux_token5: + SHIFT(12); default: PARSE_PANIC(); } case 50: SET_LEX_STATE(2); switch (LOOKAHEAD_SYM()) { + case ts_aux_repeat_helper1: + SHIFT(51); case ts_aux_token2: SHIFT(18); case ts_aux_token3: SHIFT(51); - case ts_aux_repeat_helper1: - SHIFT(51); default: PARSE_PANIC(); } diff --git a/src/compiler/generate_code/c_code.cpp b/src/compiler/generate_code/c_code.cpp index bcb3bdd3..ad6da173 100644 --- a/src/compiler/generate_code/c_code.cpp +++ b/src/compiler/generate_code/c_code.cpp @@ -1,12 +1,11 @@ #include "c_code.h" #include -#include -#include +#include +#include using std::string; using std::to_string; -using std::unordered_map; -using std::unordered_set; +using std::map; using std::vector; using std::set; using std::pair; @@ -146,7 +145,7 @@ namespace tree_sitter { return result; } - string code_for_parse_actions(const unordered_set &actions, const unordered_set &expected_inputs) { + string code_for_parse_actions(const set &actions, const set &expected_inputs) { auto action = actions.begin(); switch (action->type) { case ParseActionTypeAccept: @@ -165,7 +164,7 @@ namespace tree_sitter { return input; } - string lex_error_call(const unordered_set &expected_inputs) { + string lex_error_call(const set &expected_inputs) { rules::CharacterSet expected_set; for (auto &rule : expected_inputs) expected_set.add_set(rule); @@ -181,7 +180,7 @@ namespace tree_sitter { return result; } - string code_for_lex_actions(const unordered_set &actions, const unordered_set &expected_inputs) { + string code_for_lex_actions(const set &actions, const set &expected_inputs) { auto action = actions.begin(); if (action == actions.end()) { return lex_error_call(expected_inputs); diff --git a/src/compiler/lex_table.cpp b/src/compiler/lex_table.cpp index 8c495997..59333fd7 100644 --- a/src/compiler/lex_table.cpp +++ b/src/compiler/lex_table.cpp @@ -2,8 +2,8 @@ using std::string; using std::to_string; -using std::unordered_map; -using std::unordered_set; +using std::map; +using std::set; using tree_sitter::rules::Symbol; using tree_sitter::rules::CharacterSet; @@ -33,6 +33,14 @@ namespace tree_sitter { (symbol == other.symbol); } + bool LexAction::operator<(const LexAction &other) const { + if (type < other.type) return true; + if (type > other.type) return false; + if (state_index < other.state_index) return true; + if (state_index > other.state_index) return false; + return (symbol < other.symbol); + } + std::ostream& operator<<(std::ostream &stream, const LexAction &action) { switch (action.type) { case LexActionTypeError: @@ -45,8 +53,8 @@ namespace tree_sitter { } // State - unordered_set LexState::expected_inputs() const { - unordered_set result; + set LexState::expected_inputs() const { + set result; for (auto pair : actions) result.insert(pair.first); return result; diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index ae14d38d..3624c718 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -4,7 +4,8 @@ #include #include #include -#include +#include +#include #include "symbol.h" #include "character_set.h" @@ -22,6 +23,7 @@ namespace tree_sitter { static LexAction Error(); static LexAction Advance(size_t state_index); bool operator==(const LexAction &action) const; + bool operator<(const LexAction &action) const; LexActionType type; rules::Symbol symbol; @@ -45,9 +47,9 @@ namespace std { namespace tree_sitter { class LexState { public: - std::unordered_map> actions; - std::unordered_set default_actions; - std::unordered_set expected_inputs() const; + std::map> actions; + std::set default_actions; + std::set expected_inputs() const; }; class LexTable { diff --git a/src/compiler/parse_table.cpp b/src/compiler/parse_table.cpp index 9752734e..bc8c82d7 100644 --- a/src/compiler/parse_table.cpp +++ b/src/compiler/parse_table.cpp @@ -3,7 +3,7 @@ using std::string; using std::ostream; using std::to_string; -using std::unordered_set; +using std::set; using std::vector; using tree_sitter::rules::Symbol; @@ -38,6 +38,14 @@ namespace tree_sitter { return types_eq && state_indices_eq && child_flags_eq; } + bool ParseAction::operator<(const ParseAction &other) const { + if (type < other.type) return true; + if (type > other.type) return false; + if (state_index < other.state_index) return true; + if (state_index > other.state_index) return false; + return (child_flags < other.child_flags); + } + ostream& operator<<(ostream &stream, const ParseAction &action) { switch (action.type) { case ParseActionTypeError: @@ -54,8 +62,8 @@ namespace tree_sitter { // State ParseState::ParseState() : lex_state_index(-1) {} - unordered_set ParseState::expected_inputs() const { - unordered_set result; + set ParseState::expected_inputs() const { + set result; for (auto pair : actions) result.insert(pair.first); return result; diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 8996700f..dcef2f18 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -1,9 +1,9 @@ #ifndef __TreeSitter__parse_table__ #define __TreeSitter__parse_table__ -#include +#include #include -#include +#include #include "symbol.h" namespace tree_sitter { @@ -22,6 +22,7 @@ namespace tree_sitter { static ParseAction Shift(size_t state_index); static ParseAction Reduce(rules::Symbol symbol, const std::vector &child_flags); bool operator==(const ParseAction &action) const; + bool operator<(const ParseAction &action) const; ParseActionType type; std::vector child_flags; @@ -49,8 +50,8 @@ namespace tree_sitter { class ParseState { public: ParseState(); - std::unordered_map> actions; - std::unordered_set expected_inputs() const; + std::map> actions; + std::set expected_inputs() const; size_t lex_state_index; }; @@ -62,7 +63,7 @@ namespace tree_sitter { void add_action(size_t state_index, rules::Symbol symbol, ParseAction action); std::vector states; - std::unordered_set symbols; + std::set symbols; }; } diff --git a/src/compiler/rules/character_set.cpp b/src/compiler/rules/character_set.cpp index 4d6bd7d6..cfd9548d 100644 --- a/src/compiler/rules/character_set.cpp +++ b/src/compiler/rules/character_set.cpp @@ -64,6 +64,10 @@ namespace tree_sitter { return other && (ranges == other->ranges); } + bool CharacterSet::operator<(const CharacterSet &other) const { + return ranges < other.ranges; + } + size_t CharacterSet::hash_code() const { return typeid(this).hash_code() ^ hash()(to_string()); } diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index 1870a232..064a39cf 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -44,6 +44,7 @@ namespace tree_sitter { CharacterSet remove_set(const CharacterSet &other); bool operator==(const Rule& other) const; + bool operator<(const CharacterSet &) const; size_t hash_code() const; rule_ptr copy() const; std::string to_string() const; diff --git a/src/compiler/rules/symbol.cpp b/src/compiler/rules/symbol.cpp index de5a73ef..492033c5 100644 --- a/src/compiler/rules/symbol.cpp +++ b/src/compiler/rules/symbol.cpp @@ -33,9 +33,9 @@ namespace tree_sitter { } bool Symbol::operator<(const Symbol &other) const { - if (name < other.name) return true; - if (other.name < name) return false; - return is_auxiliary < other.is_auxiliary; + if (is_auxiliary < other.is_auxiliary) return true; + if (is_auxiliary > other.is_auxiliary) return false; + return (name < other.name); } void Symbol::accept(Visitor &visitor) const { diff --git a/todo.md b/todo.md index c7930981..5ac62183 100644 --- a/todo.md +++ b/todo.md @@ -2,8 +2,6 @@ TODO ==== ## correct batch parsing -- generate correct lexers in the presence of 'overlapping' transition - rules (e.g. transitions on 'a' and on \w). - add comments to generated C code giving an example string for each token - change the meaning of 'repeat' from 1-or-more to 0-or-more - fix any memory leaks