diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index dd37460e..6f169dce 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -30,81 +30,82 @@ LEX_FN() { START_LEXER(); switch (LEX_STATE()) { case 0: - if (LOOKAHEAD_CHAR() == '\0') - ADVANCE(1); - LEX_ERROR(1, EXPECT({""})); + LEX_ERROR(); case 1: - ACCEPT_TOKEN(ts_aux_sym_end); - case 2: - if (LOOKAHEAD_CHAR() == '\0') - ADVANCE(1); if (LOOKAHEAD_CHAR() == '+') - ADVANCE(3); - LEX_ERROR(2, EXPECT({"", "+"})); - case 3: + ADVANCE(2); + LEX_ERROR(); + case 2: ACCEPT_TOKEN(ts_sym_plus); - case 4: + case 3: if (LOOKAHEAD_CHAR() == ')') - ADVANCE(5); - LEX_ERROR(1, EXPECT({")"})); - case 5: + ADVANCE(4); + LEX_ERROR(); + case 4: ACCEPT_TOKEN(ts_aux_sym_token2); + case 5: + if (LOOKAHEAD_CHAR() == ')') + ADVANCE(4); + if (LOOKAHEAD_CHAR() == '+') + ADVANCE(2); + LEX_ERROR(); case 6: if (LOOKAHEAD_CHAR() == ')') - ADVANCE(5); + ADVANCE(4); + if (LOOKAHEAD_CHAR() == '*') + ADVANCE(7); if (LOOKAHEAD_CHAR() == '+') - ADVANCE(3); - LEX_ERROR(2, EXPECT({")", "+"})); + ADVANCE(2); + LEX_ERROR(); case 7: - if (LOOKAHEAD_CHAR() == ')') - ADVANCE(5); - if (LOOKAHEAD_CHAR() == '*') - ADVANCE(8); - if (LOOKAHEAD_CHAR() == '+') - ADVANCE(3); - LEX_ERROR(1, EXPECT({")-+"})); - case 8: ACCEPT_TOKEN(ts_sym_times); - case 9: + case 8: if (LOOKAHEAD_CHAR() == '(') + ADVANCE(9); + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') ADVANCE(10); - if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(11); if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) - ADVANCE(12); - LEX_ERROR(4, EXPECT({"(", "0-9", "A-Z", "a-z"})); - case 10: + ADVANCE(11); + LEX_ERROR(); + case 9: ACCEPT_TOKEN(ts_aux_sym_token1); - case 11: + case 10: if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(11); + ADVANCE(10); ACCEPT_TOKEN(ts_sym_number); - case 12: + case 11: if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) - ADVANCE(12); + ADVANCE(11); ACCEPT_TOKEN(ts_sym_variable); - case 13: + case 12: if (LOOKAHEAD_CHAR() == ')') - ADVANCE(5); + ADVANCE(4); if (LOOKAHEAD_CHAR() == '*') - ADVANCE(8); - LEX_ERROR(1, EXPECT({")-*"})); - case 14: - if (LOOKAHEAD_CHAR() == '\0') - ADVANCE(1); + ADVANCE(7); + LEX_ERROR(); + case 13: if (LOOKAHEAD_CHAR() == '*') - ADVANCE(8); + ADVANCE(7); if (LOOKAHEAD_CHAR() == '+') - ADVANCE(3); - LEX_ERROR(2, EXPECT({"", "*-+"})); - case 15: - if (LOOKAHEAD_CHAR() == '\0') - ADVANCE(1); + ADVANCE(2); + LEX_ERROR(); + case 14: if (LOOKAHEAD_CHAR() == '*') - ADVANCE(8); - LEX_ERROR(2, EXPECT({"", "*"})); + ADVANCE(7); + LEX_ERROR(); + case ts_lex_state_error: + if (LOOKAHEAD_CHAR() == '*') + ADVANCE(7); + if (LOOKAHEAD_CHAR() == '+') + ADVANCE(2); + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(10); + if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') || + ('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z')) + ADVANCE(11); + LEX_ERROR(); default: LEX_PANIC(); } @@ -115,7 +116,7 @@ PARSE_FN() { START_PARSER(); switch (PARSE_STATE()) { case 0: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(1); @@ -130,7 +131,7 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(49); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 1: SET_LEX_STATE(0); @@ -138,10 +139,10 @@ PARSE_FN() { case ts_aux_sym_end: ACCEPT_INPUT(); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 2: - SET_LEX_STATE(14); + SET_LEX_STATE(13); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_term, 1, COLLAPSE({0})); @@ -150,10 +151,10 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_term, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_end})); } case 3: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_factor: SHIFT(4); @@ -164,30 +165,30 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(6); default: - PARSE_PANIC(); + PARSE_ERROR(4, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_variable, ts_aux_sym_token1})); } case 4: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0})); case ts_aux_sym_end: REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_end})); } case 5: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); case ts_aux_sym_end: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_end})); } case 6: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(7); @@ -202,28 +203,28 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(26); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 7: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: SHIFT(8); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 8: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); case ts_aux_sym_end: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_end})); } case 9: - SET_LEX_STATE(7); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_term, 1, COLLAPSE({0})); @@ -232,10 +233,10 @@ PARSE_FN() { case ts_aux_sym_token2: REDUCE(ts_sym_term, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_token2})); } case 10: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_factor: SHIFT(11); @@ -246,30 +247,30 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(13); default: - PARSE_PANIC(); + PARSE_ERROR(4, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_variable, ts_aux_sym_token1})); } case 11: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0})); case ts_aux_sym_token2: REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_token2})); } case 12: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); case ts_aux_sym_token2: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_token2})); } case 13: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(14); @@ -284,28 +285,28 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(26); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 14: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: SHIFT(15); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 15: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); case ts_aux_sym_token2: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_token2})); } case 16: - SET_LEX_STATE(7); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); @@ -314,20 +315,20 @@ PARSE_FN() { case ts_aux_sym_token2: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_token2})); } case 17: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: SHIFT(18); case ts_aux_sym_token2: REDUCE(ts_sym_expression, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_token2})); } case 18: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_factor: SHIFT(19); @@ -340,20 +341,20 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(31); default: - PARSE_PANIC(); + PARSE_ERROR(5, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 19: - SET_LEX_STATE(13); + SET_LEX_STATE(12); switch (LOOKAHEAD_SYM()) { case ts_sym_times: SHIFT(20); case ts_aux_sym_token2: REDUCE(ts_sym_term, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_token2})); } case 20: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_factor: SHIFT(21); @@ -364,26 +365,26 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(23); default: - PARSE_PANIC(); + PARSE_ERROR(4, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_variable, ts_aux_sym_token1})); } case 21: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 22: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 23: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(24); @@ -398,26 +399,26 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(26); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 24: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: SHIFT(25); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 25: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 26: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(27); @@ -432,18 +433,18 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(26); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 27: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: SHIFT(28); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 28: - SET_LEX_STATE(7); + SET_LEX_STATE(6); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); @@ -452,28 +453,28 @@ PARSE_FN() { case ts_aux_sym_token2: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_token2})); } case 29: - SET_LEX_STATE(13); + SET_LEX_STATE(12); switch (LOOKAHEAD_SYM()) { case ts_sym_times: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); case ts_aux_sym_token2: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_token2})); } case 30: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: REDUCE(ts_sym_expression, 3, COLLAPSE({0, 0, 0})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 31: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(32); @@ -488,28 +489,28 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(26); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 32: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: SHIFT(33); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 33: - SET_LEX_STATE(13); + SET_LEX_STATE(12); switch (LOOKAHEAD_SYM()) { case ts_sym_times: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); case ts_aux_sym_token2: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_token2})); } case 34: - SET_LEX_STATE(14); + SET_LEX_STATE(13); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); @@ -518,20 +519,20 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_end})); } case 35: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: SHIFT(36); case ts_aux_sym_end: REDUCE(ts_sym_expression, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_end})); } case 36: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_factor: SHIFT(37); @@ -544,20 +545,20 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(46); default: - PARSE_PANIC(); + PARSE_ERROR(5, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 37: - SET_LEX_STATE(15); + SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { case ts_sym_times: SHIFT(38); case ts_aux_sym_end: REDUCE(ts_sym_term, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_end})); } case 38: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_factor: SHIFT(39); @@ -568,7 +569,7 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(41); default: - PARSE_PANIC(); + PARSE_ERROR(4, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_variable, ts_aux_sym_token1})); } case 39: SET_LEX_STATE(0); @@ -576,7 +577,7 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 40: SET_LEX_STATE(0); @@ -584,10 +585,10 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 41: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(42); @@ -602,15 +603,15 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(26); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 42: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: SHIFT(43); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 43: SET_LEX_STATE(0); @@ -618,17 +619,17 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 44: - SET_LEX_STATE(15); + SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { case ts_sym_times: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); case ts_aux_sym_end: REDUCE(ts_sym_factor, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_end})); } case 45: SET_LEX_STATE(0); @@ -636,10 +637,10 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_expression, 3, COLLAPSE({0, 0, 0})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 46: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(47); @@ -654,28 +655,28 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(26); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 47: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: SHIFT(48); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 48: - SET_LEX_STATE(15); + SET_LEX_STATE(14); switch (LOOKAHEAD_SYM()) { case ts_sym_times: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); case ts_aux_sym_end: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_end})); } case 49: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_expression: SHIFT(50); @@ -690,18 +691,18 @@ PARSE_FN() { case ts_aux_sym_token1: SHIFT(26); default: - PARSE_PANIC(); + PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); } case 50: - SET_LEX_STATE(4); + SET_LEX_STATE(3); switch (LOOKAHEAD_SYM()) { case ts_aux_sym_token2: SHIFT(51); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_token2})); } case 51: - SET_LEX_STATE(14); + SET_LEX_STATE(13); switch (LOOKAHEAD_SYM()) { case ts_sym_plus: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); @@ -710,7 +711,7 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_end})); } default: PARSE_PANIC(); diff --git a/examples/parsers/json.c b/examples/parsers/json.c index b86f0080..ba7dfe57 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -38,135 +38,161 @@ LEX_FN() { START_LEXER(); switch (LEX_STATE()) { case 0: - if (LOOKAHEAD_CHAR() == '\0') - ADVANCE(1); - LEX_ERROR(1, EXPECT({""})); + LEX_ERROR(); case 1: - ACCEPT_TOKEN(ts_aux_sym_end); + if (LOOKAHEAD_CHAR() == ',') + ADVANCE(2); + if (LOOKAHEAD_CHAR() == '}') + ADVANCE(3); + LEX_ERROR(); case 2: - if (LOOKAHEAD_CHAR() == ',') - ADVANCE(3); - if (LOOKAHEAD_CHAR() == '}') - ADVANCE(4); - LEX_ERROR(2, EXPECT({",", "}"})); - case 3: ACCEPT_TOKEN(ts_sym_comma); - case 4: + case 3: ACCEPT_TOKEN(ts_sym_right_brace); - case 5: + case 4: if (LOOKAHEAD_CHAR() == '}') - ADVANCE(4); - LEX_ERROR(1, EXPECT({"}"})); - case 6: - if (LOOKAHEAD_CHAR() == ',') ADVANCE(3); + LEX_ERROR(); + case 5: + if (LOOKAHEAD_CHAR() == ',') + ADVANCE(2); if (LOOKAHEAD_CHAR() == ']') - ADVANCE(7); - LEX_ERROR(2, EXPECT({",", "]"})); - case 7: + ADVANCE(6); + LEX_ERROR(); + case 6: ACCEPT_TOKEN(ts_sym_right_bracket); - case 8: + case 7: if (LOOKAHEAD_CHAR() == ']') - ADVANCE(7); - LEX_ERROR(1, EXPECT({"]"})); - case 9: + ADVANCE(6); + LEX_ERROR(); + case 8: if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(10); + ADVANCE(9); if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(16); + ADVANCE(15); if (LOOKAHEAD_CHAR() == '[') - ADVANCE(17); + ADVANCE(16); if (LOOKAHEAD_CHAR() == '{') - ADVANCE(18); - LEX_ERROR(4, EXPECT({"\"", "0-9", "[", "{"})); + ADVANCE(17); + LEX_ERROR(); + case 9: + if (!((LOOKAHEAD_CHAR() == '\"') || + (LOOKAHEAD_CHAR() == '\\'))) + ADVANCE(10); + if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(11); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(12); + if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') + ADVANCE(14); + LEX_ERROR(); case 10: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(11); + ADVANCE(10); if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(12); + ADVANCE(11); if (LOOKAHEAD_CHAR() == '\\') - ADVANCE(13); + ADVANCE(12); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(15); - LEX_ERROR(1, EXPECT({""})); + ADVANCE(14); + LEX_ERROR(); case 11: + ACCEPT_TOKEN(ts_sym_string); + case 12: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(11); + ADVANCE(10); if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(12); - if (LOOKAHEAD_CHAR() == '\\') ADVANCE(13); + if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') + ADVANCE(10); + if (LOOKAHEAD_CHAR() == '\\') + ADVANCE(12); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(15); - LEX_ERROR(1, EXPECT({""})); - case 12: - ACCEPT_TOKEN(ts_sym_string); + ADVANCE(14); + LEX_ERROR(); case 13: if (!((LOOKAHEAD_CHAR() == '\"') || (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(11); + ADVANCE(10); if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(14); - if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"') ADVANCE(11); if (LOOKAHEAD_CHAR() == '\\') - ADVANCE(13); - if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(15); - LEX_ERROR(1, EXPECT({""})); - case 14: - if (!((LOOKAHEAD_CHAR() == '\"') || - (LOOKAHEAD_CHAR() == '\\'))) - ADVANCE(11); - if (LOOKAHEAD_CHAR() == '\"') ADVANCE(12); - if (LOOKAHEAD_CHAR() == '\\') - ADVANCE(13); if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\') - ADVANCE(15); + ADVANCE(14); ACCEPT_TOKEN(ts_sym_string); - case 15: + case 14: if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(11); - LEX_ERROR(1, EXPECT({"\""})); - case 16: + ADVANCE(10); + LEX_ERROR(); + case 15: if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(16); + ADVANCE(15); ACCEPT_TOKEN(ts_sym_number); - case 17: + case 16: ACCEPT_TOKEN(ts_sym_left_bracket); - case 18: + case 17: ACCEPT_TOKEN(ts_sym_left_brace); - case 19: + case 18: if (LOOKAHEAD_CHAR() == ':') - ADVANCE(20); - LEX_ERROR(1, EXPECT({":"})); - case 20: + ADVANCE(19); + LEX_ERROR(); + case 19: ACCEPT_TOKEN(ts_sym_colon); + case 20: + if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(9); + if (LOOKAHEAD_CHAR() == '}') + ADVANCE(3); + LEX_ERROR(); case 21: if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(10); - if (LOOKAHEAD_CHAR() == '}') - ADVANCE(4); - LEX_ERROR(2, EXPECT({"\"", "}"})); + ADVANCE(9); + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(15); + if (LOOKAHEAD_CHAR() == '[') + ADVANCE(16); + if (LOOKAHEAD_CHAR() == ']') + ADVANCE(6); + if (LOOKAHEAD_CHAR() == '{') + ADVANCE(17); + LEX_ERROR(); case 22: if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(10); - if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') - ADVANCE(16); - if (LOOKAHEAD_CHAR() == '[') - ADVANCE(17); - if (LOOKAHEAD_CHAR() == ']') - ADVANCE(7); - if (LOOKAHEAD_CHAR() == '{') - ADVANCE(18); - LEX_ERROR(5, EXPECT({"\"", "0-9", "[", "]", "{"})); + ADVANCE(9); + LEX_ERROR(); case 23: + ACCEPT_TOKEN(ts_sym_comma); + case 24: + ACCEPT_TOKEN(ts_sym_colon); + case 25: + ACCEPT_TOKEN(ts_sym_left_bracket); + case 26: + ACCEPT_TOKEN(ts_sym_right_bracket); + case 27: + ACCEPT_TOKEN(ts_sym_left_brace); + case 28: + ACCEPT_TOKEN(ts_sym_right_brace); + case ts_lex_state_error: if (LOOKAHEAD_CHAR() == '\"') - ADVANCE(10); - LEX_ERROR(1, EXPECT({"\""})); + ADVANCE(9); + if (LOOKAHEAD_CHAR() == ',') + ADVANCE(23); + if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9') + ADVANCE(15); + if (LOOKAHEAD_CHAR() == ':') + ADVANCE(24); + if (LOOKAHEAD_CHAR() == '[') + ADVANCE(25); + if (LOOKAHEAD_CHAR() == ']') + ADVANCE(26); + if (LOOKAHEAD_CHAR() == '{') + ADVANCE(27); + if (LOOKAHEAD_CHAR() == '}') + ADVANCE(28); + LEX_ERROR(); default: LEX_PANIC(); } @@ -177,7 +203,7 @@ PARSE_FN() { START_PARSER(); switch (PARSE_STATE()) { case 0: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(1); @@ -194,7 +220,7 @@ PARSE_FN() { case ts_sym_left_bracket: SHIFT(44); default: - PARSE_PANIC(); + PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket})); } case 1: SET_LEX_STATE(0); @@ -202,7 +228,7 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_value, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 2: SET_LEX_STATE(0); @@ -210,28 +236,28 @@ PARSE_FN() { case ts_aux_sym_end: ACCEPT_INPUT(); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 3: - SET_LEX_STATE(21); + SET_LEX_STATE(20); switch (LOOKAHEAD_SYM()) { case ts_sym_string: SHIFT(4); case ts_sym_right_brace: SHIFT(43); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_string, ts_sym_right_brace})); } case 4: - SET_LEX_STATE(19); + SET_LEX_STATE(18); switch (LOOKAHEAD_SYM()) { case ts_sym_colon: SHIFT(5); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_colon})); } case 5: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(6); @@ -248,20 +274,20 @@ PARSE_FN() { case ts_sym_left_bracket: SHIFT(19); default: - PARSE_PANIC(); + PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket})); } case 6: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_value, 1, COLLAPSE({0})); case ts_sym_right_brace: REDUCE(ts_sym_value, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace})); } case 7: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: SHIFT(8); @@ -270,26 +296,26 @@ PARSE_FN() { case ts_aux_sym_repeat_helper2: SHIFT(41); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_brace, ts_aux_sym_repeat_helper2})); } case 8: - SET_LEX_STATE(23); + SET_LEX_STATE(22); switch (LOOKAHEAD_SYM()) { case ts_sym_string: SHIFT(9); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_string})); } case 9: - SET_LEX_STATE(19); + SET_LEX_STATE(18); switch (LOOKAHEAD_SYM()) { case ts_sym_colon: SHIFT(10); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_colon})); } case 10: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(6); @@ -306,10 +332,10 @@ PARSE_FN() { case ts_sym_left_bracket: SHIFT(19); default: - PARSE_PANIC(); + PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket})); } case 11: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: SHIFT(8); @@ -318,36 +344,36 @@ PARSE_FN() { case ts_aux_sym_repeat_helper2: SHIFT(12); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_brace, ts_aux_sym_repeat_helper2})); } case 12: - SET_LEX_STATE(5); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_sym_right_brace: REDUCE(ts_aux_sym_repeat_helper2, 5, COLLAPSE({1, 0, 1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_right_brace})); } case 13: - SET_LEX_STATE(21); + SET_LEX_STATE(20); switch (LOOKAHEAD_SYM()) { case ts_sym_string: SHIFT(14); case ts_sym_right_brace: SHIFT(40); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_string, ts_sym_right_brace})); } case 14: - SET_LEX_STATE(19); + SET_LEX_STATE(18); switch (LOOKAHEAD_SYM()) { case ts_sym_colon: SHIFT(15); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_colon})); } case 15: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(6); @@ -364,10 +390,10 @@ PARSE_FN() { case ts_sym_left_bracket: SHIFT(19); default: - PARSE_PANIC(); + PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket})); } case 16: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: SHIFT(8); @@ -376,28 +402,28 @@ PARSE_FN() { case ts_aux_sym_repeat_helper2: SHIFT(17); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_brace, ts_aux_sym_repeat_helper2})); } case 17: - SET_LEX_STATE(5); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_sym_right_brace: SHIFT(18); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_right_brace})); } case 18: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); case ts_sym_right_brace: REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace})); } case 19: - SET_LEX_STATE(22); + SET_LEX_STATE(21); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(20); @@ -416,20 +442,20 @@ PARSE_FN() { case ts_sym_right_bracket: SHIFT(39); default: - PARSE_PANIC(); + PARSE_ERROR(8, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket, ts_sym_right_bracket})); } case 20: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_value, 1, COLLAPSE({0})); case ts_sym_right_bracket: REDUCE(ts_sym_value, 1, COLLAPSE({0})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket})); } case 21: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: SHIFT(22); @@ -438,10 +464,10 @@ PARSE_FN() { case ts_aux_sym_repeat_helper1: SHIFT(37); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_bracket, ts_aux_sym_repeat_helper1})); } case 22: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(20); @@ -458,10 +484,10 @@ PARSE_FN() { case ts_sym_left_bracket: SHIFT(32); default: - PARSE_PANIC(); + PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket})); } case 23: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: SHIFT(22); @@ -470,36 +496,36 @@ PARSE_FN() { case ts_aux_sym_repeat_helper1: SHIFT(24); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_bracket, ts_aux_sym_repeat_helper1})); } case 24: - SET_LEX_STATE(8); + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_sym_right_bracket: REDUCE(ts_aux_sym_repeat_helper1, 3, COLLAPSE({1, 0, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_right_bracket})); } case 25: - SET_LEX_STATE(21); + SET_LEX_STATE(20); switch (LOOKAHEAD_SYM()) { case ts_sym_string: SHIFT(26); case ts_sym_right_brace: SHIFT(31); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_string, ts_sym_right_brace})); } case 26: - SET_LEX_STATE(19); + SET_LEX_STATE(18); switch (LOOKAHEAD_SYM()) { case ts_sym_colon: SHIFT(27); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_colon})); } case 27: - SET_LEX_STATE(9); + SET_LEX_STATE(8); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(6); @@ -516,10 +542,10 @@ PARSE_FN() { case ts_sym_left_bracket: SHIFT(19); default: - PARSE_PANIC(); + PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket})); } case 28: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: SHIFT(8); @@ -528,38 +554,38 @@ PARSE_FN() { case ts_aux_sym_repeat_helper2: SHIFT(29); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_brace, ts_aux_sym_repeat_helper2})); } case 29: - SET_LEX_STATE(5); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_sym_right_brace: SHIFT(30); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_right_brace})); } case 30: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); case ts_sym_right_bracket: REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket})); } case 31: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_object, 2, COLLAPSE({1, 1})); case ts_sym_right_bracket: REDUCE(ts_sym_object, 2, COLLAPSE({1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket})); } case 32: - SET_LEX_STATE(22); + SET_LEX_STATE(21); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(20); @@ -578,10 +604,10 @@ PARSE_FN() { case ts_sym_right_bracket: SHIFT(36); default: - PARSE_PANIC(); + PARSE_ERROR(8, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket, ts_sym_right_bracket})); } case 33: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: SHIFT(22); @@ -590,81 +616,81 @@ PARSE_FN() { case ts_aux_sym_repeat_helper1: SHIFT(34); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_bracket, ts_aux_sym_repeat_helper1})); } case 34: - SET_LEX_STATE(8); + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_sym_right_bracket: SHIFT(35); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_right_bracket})); } case 35: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1})); case ts_sym_right_bracket: REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket})); } case 36: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_array, 2, COLLAPSE({1, 1})); case ts_sym_right_bracket: REDUCE(ts_sym_array, 2, COLLAPSE({1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket})); } case 37: - SET_LEX_STATE(8); + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_sym_right_bracket: SHIFT(38); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_right_bracket})); } case 38: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1})); case ts_sym_right_brace: REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace})); } case 39: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_array, 2, COLLAPSE({1, 1})); case ts_sym_right_brace: REDUCE(ts_sym_array, 2, COLLAPSE({1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace})); } case 40: - SET_LEX_STATE(2); + SET_LEX_STATE(1); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: REDUCE(ts_sym_object, 2, COLLAPSE({1, 1})); case ts_sym_right_brace: REDUCE(ts_sym_object, 2, COLLAPSE({1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace})); } case 41: - SET_LEX_STATE(5); + SET_LEX_STATE(4); switch (LOOKAHEAD_SYM()) { case ts_sym_right_brace: SHIFT(42); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_right_brace})); } case 42: SET_LEX_STATE(0); @@ -672,7 +698,7 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 43: SET_LEX_STATE(0); @@ -680,10 +706,10 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_object, 2, COLLAPSE({1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 44: - SET_LEX_STATE(22); + SET_LEX_STATE(21); switch (LOOKAHEAD_SYM()) { case ts_sym_array: SHIFT(20); @@ -702,10 +728,10 @@ PARSE_FN() { case ts_sym_right_bracket: SHIFT(48); default: - PARSE_PANIC(); + PARSE_ERROR(8, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket, ts_sym_right_bracket})); } case 45: - SET_LEX_STATE(6); + SET_LEX_STATE(5); switch (LOOKAHEAD_SYM()) { case ts_sym_comma: SHIFT(22); @@ -714,15 +740,15 @@ PARSE_FN() { case ts_aux_sym_repeat_helper1: SHIFT(46); default: - PARSE_PANIC(); + PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_bracket, ts_aux_sym_repeat_helper1})); } case 46: - SET_LEX_STATE(8); + SET_LEX_STATE(7); switch (LOOKAHEAD_SYM()) { case ts_sym_right_bracket: SHIFT(47); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_sym_right_bracket})); } case 47: SET_LEX_STATE(0); @@ -730,7 +756,7 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } case 48: SET_LEX_STATE(0); @@ -738,7 +764,7 @@ PARSE_FN() { case ts_aux_sym_end: REDUCE(ts_sym_array, 2, COLLAPSE({1, 1})); default: - PARSE_PANIC(); + PARSE_ERROR(1, EXPECT({ts_aux_sym_end})); } default: PARSE_PANIC(); diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 9aef2e17..08a20296 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -12,7 +12,7 @@ extern "C" { //#define TS_DEBUG_PARSE //#define TS_DEBUG_LEX - + #ifdef TS_DEBUG_LEX #define DEBUG_LEX(...) fprintf(stderr, __VA_ARGS__) #else @@ -27,8 +27,9 @@ extern "C" { static int INITIAL_STACK_SIZE = 100; static const char *ts_symbol_names[]; - + typedef int ts_state; +static const ts_state ts_lex_state_error = -1; typedef struct { ts_state state; @@ -37,15 +38,18 @@ typedef struct { typedef struct { const char *input; + int error_mode; size_t position; ts_tree *lookahead_node; ts_tree *prev_lookahead_node; ts_state lex_state; ts_stack_entry *stack; size_t stack_size; - ts_parse_result result; + ts_tree *result; } ts_parser; +static void ts_lex(ts_parser *parser); + static ts_parser ts_parser_make(const char *input) { ts_parser result = { .input = input, @@ -54,13 +58,7 @@ static ts_parser ts_parser_make(const char *input) { .lex_state = 0, .stack = calloc(INITIAL_STACK_SIZE, sizeof(ts_stack_entry)), .stack_size = 0, - .result = { - .tree = NULL, - .error = { - .expected_inputs = NULL, - .expected_input_count = 0 - }, - }, + .result = NULL, }; return result; } @@ -69,9 +67,9 @@ static char ts_parser_lookahead_char(const ts_parser *parser) { return parser->input[parser->position]; } -static long ts_parser_lookahead_sym(const ts_parser *parser) { +static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) { ts_tree *node = parser->lookahead_node; - return node ? node->value : -1; + return node ? node->symbol : ts_symbol_error; } static ts_state ts_parser_parse_state(const ts_parser *parser) { @@ -96,7 +94,7 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_ for (int i = 0; i < immediate_child_count; i++) { ts_tree *child = parser->stack[parser->stack_size + i].node; if (collapse_flags[i]) { - total_child_count += child->child_count; + total_child_count += ts_tree_child_count(child); } else { total_child_count++; } @@ -107,8 +105,11 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_ for (int i = 0; i < immediate_child_count; i++) { ts_tree *child = parser->stack[parser->stack_size + i].node; if (collapse_flags[i]) { - memcpy(children + n, child->children, (child->child_count * sizeof(ts_tree *))); - n += child->child_count; + size_t grandchild_count = ts_tree_child_count(child); + if (grandchild_count > 0) { + memcpy(children + n, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *))); + n += grandchild_count; + } } else { children[n] = child; n++; @@ -116,22 +117,10 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_ } parser->prev_lookahead_node = parser->lookahead_node; - parser->lookahead_node = ts_tree_make(symbol, total_child_count, children); + parser->lookahead_node = ts_tree_make_node(symbol, total_child_count, children); DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser)); } -static void ts_parser_set_error(ts_parser *parser, size_t count, const char **expected_inputs) { - ts_error *error = &parser->result.error; - error->position = parser->position; - error->lookahead_char = ts_parser_lookahead_char(parser); - error->expected_input_count = count; - error->expected_inputs = expected_inputs; -} - -static int ts_parser_has_error(const ts_parser *parser) { - return (parser->result.error.expected_inputs != NULL); -} - static void ts_parser_advance(ts_parser *parser, ts_state lex_state) { DEBUG_LEX("character: '%c' \n", ts_parser_lookahead_char(parser)); parser->position++; @@ -140,18 +129,28 @@ static void ts_parser_advance(ts_parser *parser, ts_state lex_state) { static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) { DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]); - parser->lookahead_node = ts_tree_make(symbol, 0, NULL); + parser->lookahead_node = ts_tree_make_leaf(symbol); } -static void ts_parser_accept_input(ts_parser *parser) { - parser->result.tree = parser->stack[parser->stack_size - 1].node; +static ts_tree * ts_parser_tree(ts_parser *parser) { DEBUG_PARSE("accept \n"); + return parser->stack[0].node; } static void ts_parser_skip_whitespace(ts_parser *parser) { - while (isspace(parser->input[parser->position])) + while (isspace(ts_parser_lookahead_char(parser))) parser->position++; } + +static void ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) { + if (parser->error_mode) { + parser->lex_state = ts_lex_state_error; + ts_lex(parser); + } else { + parser->error_mode = 1; + parser->lookahead_node = ts_tree_make_error(ts_parser_lookahead_char(parser), count, expected_symbols); + } +} #pragma mark - DSL @@ -159,7 +158,7 @@ static void ts_parser_skip_whitespace(ts_parser *parser) { static void ts_lex(ts_parser *parser) #define PARSE_FN() \ -static ts_parse_result ts_parse(const char *input) +static const ts_tree * ts_parse(const char *input) #define SYMBOL_NAMES \ static const char *ts_symbol_names[] = @@ -175,6 +174,9 @@ ts_parser p = ts_parser_make(input), *parser = &p; \ next_state: #define START_LEXER() \ +if (ts_parser_lookahead_char(parser) == '\0') { \ + ACCEPT_TOKEN(ts_aux_sym_end) \ +} \ ts_parser_skip_whitespace(parser); \ next_state: @@ -194,7 +196,6 @@ parser->lex_state { \ parser->lex_state = state_index; \ if (LOOKAHEAD_SYM() < 0) ts_lex(parser); \ - if (ts_parser_has_error(parser)) goto done; \ } #define SHIFT(state) \ @@ -211,30 +212,33 @@ goto next_state; \ } #define ACCEPT_INPUT() \ -{ ts_parser_accept_input(parser); goto done; } +{ goto done; } #define ACCEPT_TOKEN(symbol) \ { ts_parser_set_lookahead_sym(parser, symbol); goto done; } -#define LEX_ERROR(count, inputs) \ +#define LEX_ERROR() \ +{ ts_parser_set_lookahead_sym(parser, -1); goto done; } + +#define PARSE_ERROR(count, inputs) \ { \ -static const char *expected_inputs[] = inputs; \ -ts_parser_set_error(parser, count, expected_inputs); \ -goto done; \ +static const ts_symbol expected_inputs[] = inputs; \ +ts_parser_handle_error(parser, count, expected_inputs); \ +goto next_state; \ } #define LEX_PANIC() \ -printf("Lex error: unexpected state %ud", LEX_STATE()); +printf("Lex error: unexpected state %d", LEX_STATE()); #define PARSE_PANIC() \ -printf("Parse error: unexpected state %ud", PARSE_STATE()); +printf("Parse error: unexpected state %d", PARSE_STATE()); #define EXPECT(...) __VA_ARGS__ #define COLLAPSE(...) __VA_ARGS__ #define FINISH_PARSER() \ done: \ -return parser->result; +return ts_parser_tree(parser); #define FINISH_LEXER() \ done: diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index eba8e7c3..3930769a 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -7,36 +7,37 @@ extern "C" { #include -typedef struct { - size_t position; - char lookahead_char; - size_t expected_input_count; - const char **expected_inputs; -} ts_error; - -const char * ts_error_string(const ts_error *error); - -typedef size_t ts_symbol; +typedef int ts_symbol; +extern const ts_symbol ts_symbol_error; typedef struct ts_tree { - ts_symbol value; - struct ts_tree **children; - size_t child_count; + ts_symbol symbol; size_t ref_count; + union { + struct { + size_t count; + struct ts_tree **contents; + } children; + struct { + char lookahead_char; + size_t expected_input_count; + const ts_symbol *expected_inputs; + } error; + } data; } ts_tree; -ts_tree * ts_tree_make(ts_symbol value, size_t child_count, ts_tree **children); +ts_tree * ts_tree_make_leaf(ts_symbol symbol); +ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children); +ts_tree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const ts_symbol *expected_inputs); void ts_tree_retain(ts_tree *tree); void ts_tree_release(ts_tree *tree); int ts_tree_equals(const ts_tree *tree1, const ts_tree *tree2); char * ts_tree_string(const ts_tree *tree, const char **names); +char * ts_tree_error_string(const ts_tree *tree, const char **names); +size_t ts_tree_child_count(const ts_tree *tree); +ts_tree ** ts_tree_children(const ts_tree *tree); -typedef struct { - ts_error error; - ts_tree *tree; -} ts_parse_result; - -typedef ts_parse_result ts_parse_fn(const char *); +typedef const ts_tree * ts_parse_fn(const char *); typedef struct { ts_parse_fn *parse_fn; @@ -49,7 +50,7 @@ ts_document * ts_document_make(); void ts_document_free(ts_document *); void ts_document_set_parser(ts_document *document, ts_parse_config config); void ts_document_set_text(ts_document *document, const char *text); -ts_tree * ts_document_tree(const ts_document *document); +const ts_tree * ts_document_tree(const ts_document *document); const char * ts_document_string(const ts_document *document); #ifdef __cplusplus diff --git a/spec/compiler/build_tables/perform_spec.cpp b/spec/compiler/build_tables/perform_spec.cpp index 12539c49..20e1a196 100644 --- a/spec/compiler/build_tables/perform_spec.cpp +++ b/spec/compiler/build_tables/perform_spec.cpp @@ -59,7 +59,7 @@ describe("building parse and lex tables", []() { }; function lex_state = [&](size_t parse_state_index) { - long index = table.states[parse_state_index].lex_state_index; + long index = table.states[parse_state_index].lex_state_id; return lex_table.states[index]; }; diff --git a/spec/runtime/json_spec.cpp b/spec/runtime/json_spec.cpp index 5ef47abb..f19d590e 100644 --- a/spec/runtime/json_spec.cpp +++ b/spec/runtime/json_spec.cpp @@ -5,48 +5,48 @@ extern ts_parse_config ts_parse_config_json; START_TEST describe("json", []() { - ts_document *document; + ts_document *doc; before_each([&]() { - document = ts_document_make(); - ts_document_set_parser(document, ts_parse_config_json); + doc = ts_document_make(); + ts_document_set_parser(doc, ts_parse_config_json); }); after_each([&]() { - ts_document_free(document); + ts_document_free(doc); }); it("parses strings", [&]() { - ts_document_set_text(document, "\"\""); - AssertThat(string(ts_document_string(document)), Equals("(value (string))")); + ts_document_set_text(doc, "\"\""); + AssertThat(string(ts_document_string(doc)), Equals("(value (string))")); - ts_document_set_text(document, "\"simple-string\""); - AssertThat(string(ts_document_string(document)), Equals("(value (string))")); + ts_document_set_text(doc, "\"simple-string\""); + AssertThat(string(ts_document_string(doc)), Equals("(value (string))")); - ts_document_set_text(document, "\"this is a \\\"string\\\" within a string\""); - AssertThat(string(ts_document_string(document)), Equals("(value (string))")); + ts_document_set_text(doc, "\"this is a \\\"string\\\" within a string\""); + AssertThat(string(ts_document_string(doc)), Equals("(value (string))")); }); it("parses objects", [&]() { - ts_document_set_text(document, "{}"); - AssertThat(string(ts_document_string(document)), Equals("(value (object))")); + ts_document_set_text(doc, "{}"); + AssertThat(string(ts_document_string(doc)), Equals("(value (object))")); - ts_document_set_text(document, "{ \"key1\": 1 }"); - AssertThat(string(ts_document_string(document)), Equals("(value (object (string) (value (number))))")); + ts_document_set_text(doc, "{ \"key1\": 1 }"); + AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number))))")); - ts_document_set_text(document, "{\"key1\": 1, \"key2\": 2 }"); - AssertThat(string(ts_document_string(document)), Equals("(value (object (string) (value (number)) (string) (value (number))))")); + ts_document_set_text(doc, "{\"key1\": 1, \"key2\": 2 }"); + AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number)) (string) (value (number))))")); }); it("parses arrays", [&]() { - ts_document_set_text(document, "[]"); - AssertThat(string(ts_document_string(document)), Equals("(value (array))")); + ts_document_set_text(doc, "[]"); + AssertThat(string(ts_document_string(doc)), Equals("(value (array))")); - ts_document_set_text(document, "[5]"); - AssertThat(string(ts_document_string(document)), Equals("(value (array (value (number))))")); + ts_document_set_text(doc, "[5]"); + AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number))))")); - ts_document_set_text(document, "[1, 2, 3]"); - AssertThat(string(ts_document_string(document)), Equals("(value (array (value (number)) (value (number)) (value (number))))")); + ts_document_set_text(doc, "[1, 2, 3]"); + AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (value (number)) (value (number))))")); }); }); diff --git a/spec/runtime/tree_spec.cpp b/spec/runtime/tree_spec.cpp index 321dc2fc..b87d5b20 100644 --- a/spec/runtime/tree_spec.cpp +++ b/spec/runtime/tree_spec.cpp @@ -9,8 +9,8 @@ describe("trees", []() { ts_tree *tree1, *parent1; before_each([&]() { - tree1 = ts_tree_make(cat, 0, NULL); - parent1 = ts_tree_make(dog, 1, &tree1); + tree1 = ts_tree_make_leaf(cat); + parent1 = ts_tree_make_node(dog, 1, &tree1); }); after_each([&]() { @@ -20,10 +20,10 @@ describe("trees", []() { describe("equality", [&]() { it("returns true for identical trees", [&]() { - ts_tree *tree2 = ts_tree_make(cat, 0, NULL); + ts_tree *tree2 = ts_tree_make_leaf(cat); AssertThat(ts_tree_equals(tree1, tree2), Equals(1)); - ts_tree *parent2 = ts_tree_make(dog, 1, &tree2); + ts_tree *parent2 = ts_tree_make_node(dog, 1, &tree2); AssertThat(ts_tree_equals(parent1, parent2), Equals(1)); ts_tree_release(tree2); @@ -31,13 +31,13 @@ describe("trees", []() { }); it("returns false for different trees", [&]() { - ts_tree *different_tree = ts_tree_make(pig, 0, NULL); + ts_tree *different_tree = ts_tree_make_leaf(pig); AssertThat(ts_tree_equals(tree1, different_tree), Equals(0)); - ts_tree *different_parent = ts_tree_make(dog, 1, &different_tree); + ts_tree *different_parent = ts_tree_make_node(dog, 1, &different_tree); AssertThat(ts_tree_equals(parent1, different_parent), Equals(0)); - ts_tree *parent_with_same_type = ts_tree_make(cat, 1, &different_parent); + ts_tree *parent_with_same_type = ts_tree_make_node(cat, 1, &different_parent); AssertThat(ts_tree_equals(parent_with_same_type, tree1), Equals(0)); AssertThat(ts_tree_equals(tree1, parent_with_same_type), Equals(0)); diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index 5987ecb2..c8181de4 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -14,105 +14,111 @@ namespace tree_sitter { using rules::CharacterSet; namespace build_tables { - static int NOT_FOUND = -1; + static int NOT_FOUND = -2; static Symbol START("start", rules::SymbolTypeAuxiliary); static Symbol END_OF_INPUT("end", rules::SymbolTypeAuxiliary); class TableBuilder { const PreparedGrammar grammar; const PreparedGrammar lex_grammar; - map parse_state_indices; - map lex_state_indices; + map parse_state_ids; + map lex_state_ids; ParseTable parse_table; LexTable lex_table; - long parse_state_index_for_item_set(const ParseItemSet &item_set) const { - auto entry = parse_state_indices.find(item_set); - return (entry == parse_state_indices.end()) ? NOT_FOUND : entry->second; + long parse_state_id_for_item_set(const ParseItemSet &item_set) const { + auto entry = parse_state_ids.find(item_set); + return (entry == parse_state_ids.end()) ? NOT_FOUND : entry->second; } - long lex_state_index_for_item_set(const LexItemSet &item_set) const { - auto entry = lex_state_indices.find(item_set); - return (entry == lex_state_indices.end()) ? NOT_FOUND : entry->second; + long lex_state_id_for_item_set(const LexItemSet &item_set) const { + auto entry = lex_state_ids.find(item_set); + return (entry == lex_state_ids.end()) ? NOT_FOUND : entry->second; } - void add_shift_actions(const ParseItemSet &item_set, size_t state_index) { + void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (auto transition : sym_transitions(item_set, grammar)) { Symbol symbol = transition.first; ParseItemSet item_set = transition.second; - size_t new_state_index = add_parse_state(item_set); - parse_table.add_action(state_index, symbol, ParseAction::Shift(new_state_index)); + ParseStateId new_state_id = add_parse_state(item_set); + parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id)); } } - void add_advance_actions(const LexItemSet &item_set, size_t state_index) { + void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { for (auto transition : char_transitions(item_set, grammar)) { CharacterSet rule = transition.first; LexItemSet item_set = transition.second; - size_t new_state_index = add_lex_state(item_set); - lex_table.add_action(state_index, rule, LexAction::Advance(new_state_index)); + LexStateId new_state_id = add_lex_state(item_set); + lex_table.add_action(state_id, rule, LexAction::Advance(new_state_id)); } } - void add_accept_token_actions(const LexItemSet &item_set, size_t state_index) { + void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) { for (LexItem item : item_set) { if (item.is_done()) { - lex_table.add_default_action(state_index, LexAction::Accept(item.lhs)); + lex_table.add_default_action(state_id, LexAction::Accept(item.lhs)); } } } - void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) { + void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (ParseItem item : item_set) { if (item.is_done()) { ParseAction action = (item.lhs == START) ? ParseAction::Accept() : ParseAction::Reduce(item.lhs, item.consumed_symbols); - parse_table.add_action(state_index, item.lookahead_sym, action); + parse_table.add_action(state_id, item.lookahead_sym, action); } } } - void assign_lex_state(size_t state_index) { - ParseState &state = parse_table.states[state_index]; + void assign_lex_state(ParseStateId state_id) { + ParseState &state = parse_table.states[state_id]; LexItemSet item_set; for (auto &symbol : state.expected_inputs()) { - if (symbol == END_OF_INPUT) - item_set.insert(LexItem(symbol, make_shared(std::set{ '\0' }))); if (lex_grammar.has_definition(symbol)) item_set.insert(LexItem(symbol, lex_grammar.rule(symbol))); } - state.lex_state_index = add_lex_state(item_set); + state.lex_state_id = add_lex_state(item_set); } - size_t add_lex_state(const LexItemSet &item_set) { - auto state_index = lex_state_index_for_item_set(item_set); - if (state_index == NOT_FOUND) { - state_index = lex_table.add_state(); - lex_state_indices[item_set] = state_index; - add_advance_actions(item_set, state_index); - add_accept_token_actions(item_set, state_index); + LexStateId add_lex_state(const LexItemSet &item_set) { + auto state_id = lex_state_id_for_item_set(item_set); + if (state_id == NOT_FOUND) { + state_id = lex_table.add_state(); + lex_state_ids[item_set] = state_id; + add_advance_actions(item_set, state_id); + add_accept_token_actions(item_set, state_id); } - return state_index; + return state_id; } - size_t add_parse_state(const ParseItemSet &item_set) { - auto state_index = parse_state_index_for_item_set(item_set); - if (state_index == NOT_FOUND) { - state_index = parse_table.add_state(); - parse_state_indices[item_set] = state_index; + ParseStateId add_parse_state(const ParseItemSet &item_set) { + auto state_id = parse_state_id_for_item_set(item_set); + if (state_id == NOT_FOUND) { + state_id = parse_table.add_state(); + parse_state_ids[item_set] = state_id; - add_shift_actions(item_set, state_index); - add_reduce_actions(item_set, state_index); - assign_lex_state(state_index); + add_shift_actions(item_set, state_id); + add_reduce_actions(item_set, state_id); + assign_lex_state(state_id); } - return state_index; + return state_id; + } + + void add_error_lex_state() { + LexItemSet error_item_set; + for (auto &pair : lex_grammar.rules) + error_item_set.insert(LexItem(pair.first, pair.second)); + add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID); + add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID); } // void dump_item_sets() { -// std::vector item_sets(parse_state_indices.size()); -// for (auto &pair : parse_state_indices) +// std::vector item_sets(parse_state_ids.size()); +// for (auto &pair : parse_state_ids) // item_sets[pair.second] = &pair.first; // // for (int i = 0; i < item_sets.size(); i++) { @@ -135,6 +141,7 @@ namespace tree_sitter { auto item = ParseItem(START, make_shared(grammar.start_rule_name), {}, END_OF_INPUT); ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar); add_parse_state(item_set); + add_error_lex_state(); return pair(parse_table, lex_table); } }; diff --git a/src/compiler/generate_code/c_code.cpp b/src/compiler/generate_code/c_code.cpp index fd56979c..fa6a9584 100644 --- a/src/compiler/generate_code/c_code.cpp +++ b/src/compiler/generate_code/c_code.cpp @@ -164,17 +164,13 @@ namespace tree_sitter { return input; } - string lex_error_call(const set &expected_inputs) { - rules::CharacterSet expected_set; - for (auto &rule : expected_inputs) - expected_set.add_set(rule); - - string result = "LEX_ERROR(" + to_string(expected_set.ranges.size()) + ", EXPECT({"; + string parse_error_call(const set &expected_inputs) { + string result = "PARSE_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({"; bool started = false; - for (auto &range : expected_set.ranges) { + for (auto &symbol : expected_inputs) { if (started) result += ", "; started = true; - result += "\"" + escape_string(range.to_string()) + "\""; + result += symbol_id(symbol); } result += "}));"; return result; @@ -183,7 +179,7 @@ namespace tree_sitter { string code_for_lex_actions(const set &actions, const set &expected_inputs) { auto action = actions.begin(); if (action == actions.end()) { - return lex_error_call(expected_inputs); + return "LEX_ERROR();"; } else { switch (action->type) { case LexActionTypeAdvance: @@ -198,11 +194,12 @@ namespace tree_sitter { string code_for_parse_state(const ParseState &parse_state) { string body = ""; + auto expected_inputs = parse_state.expected_inputs(); for (auto pair : parse_state.actions) - body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second, parse_state.expected_inputs())); - body += _default("PARSE_PANIC();"); + body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second, expected_inputs)); + body += _default(parse_error_call(expected_inputs)); return - string("SET_LEX_STATE(") + to_string(parse_state.lex_state_index) + ");\n" + + string("SET_LEX_STATE(") + to_string(parse_state.lex_state_id) + ");\n" + _switch("LOOKAHEAD_SYM()", body); } @@ -227,6 +224,7 @@ namespace tree_sitter { string body = ""; for (int i = 0; i < lex_table.states.size(); i++) body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i])); + body += _case("ts_lex_state_error", switch_on_lookahead_char(lex_table.error_state)); body += _default("LEX_PANIC();"); return _switch("LEX_STATE()", body); } diff --git a/src/compiler/lex_table.cpp b/src/compiler/lex_table.cpp index 4e2df4d3..f839fe5d 100644 --- a/src/compiler/lex_table.cpp +++ b/src/compiler/lex_table.cpp @@ -58,16 +58,25 @@ namespace tree_sitter { return result; } - size_t LexTable::add_state() { + LexStateId LexTable::add_state() { states.push_back(LexState()); return states.size() - 1; } - void LexTable::add_action(size_t state_index, CharacterSet match, LexAction action) { - states[state_index].actions[match].insert(action); + LexState & state(LexTable *table, LexStateId id) { + if (id < 0) + return table->error_state; + else + return table->states[id]; } - void LexTable::add_default_action(size_t state_index, LexAction action) { - states[state_index].default_actions.insert(action); + void LexTable::add_action(LexStateId id, CharacterSet match, LexAction action) { + state(this, id).actions[match].insert(action); } + + void LexTable::add_default_action(LexStateId id, LexAction action) { + state(this, id).default_actions.insert(action); + } + + const LexStateId LexTable::ERROR_STATE_ID = -1; } \ No newline at end of file diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index 527fb836..6c637d74 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -51,13 +51,17 @@ namespace tree_sitter { std::set expected_inputs() const; }; + typedef long int LexStateId; + class LexTable { public: - size_t add_state(); - void add_action(size_t state_index, rules::CharacterSet rule, LexAction action); - void add_default_action(size_t state_index, LexAction action); + static const LexStateId ERROR_STATE_ID; + LexStateId add_state(); + void add_action(LexStateId state_id, rules::CharacterSet rule, LexAction action); + void add_default_action(LexStateId state_id, LexAction action); std::vector states; + LexState error_state; }; } diff --git a/src/compiler/parse_table.cpp b/src/compiler/parse_table.cpp index 36fc3008..77ec0df0 100644 --- a/src/compiler/parse_table.cpp +++ b/src/compiler/parse_table.cpp @@ -58,7 +58,7 @@ namespace tree_sitter { } } - ParseState::ParseState() : lex_state_index(-1) {} + ParseState::ParseState() : lex_state_id(-1) {} set ParseState::expected_inputs() const { set result; @@ -86,13 +86,13 @@ namespace tree_sitter { return stream; } - size_t ParseTable::add_state() { + ParseStateId ParseTable::add_state() { states.push_back(ParseState()); return states.size() - 1; } - void ParseTable::add_action(size_t state_index, Symbol symbol, ParseAction action) { + void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) { symbols.insert(symbol); - states[state_index].actions[symbol].insert(action); + states[id].actions[symbol].insert(action); } } diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index 823d36fd..0ffb32d4 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -5,6 +5,7 @@ #include #include #include "rules/symbol.h" +#include "./lex_table.h" namespace tree_sitter { typedef enum { @@ -52,15 +53,17 @@ namespace tree_sitter { ParseState(); std::map> actions; std::set expected_inputs() const; - size_t lex_state_index; + LexStateId lex_state_id; }; + typedef unsigned long int ParseStateId; + std::ostream& operator<<(std::ostream &stream, const ParseState &state); class ParseTable { public: size_t add_state(); - void add_action(size_t state_index, rules::Symbol symbol, ParseAction action); + void add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action); std::vector states; std::set symbols; diff --git a/src/runtime/document.cpp b/src/runtime/document.cpp index c5fdeb35..0aed935b 100644 --- a/src/runtime/document.cpp +++ b/src/runtime/document.cpp @@ -3,8 +3,9 @@ struct ts_document { ts_parse_fn *parse_fn; const char **symbol_names; - ts_error error; - ts_tree *tree; + const ts_tree *tree; + size_t error_count; + ts_tree **errors; }; ts_document * ts_document_make() { @@ -21,18 +22,18 @@ void ts_document_set_parser(ts_document *document, ts_parse_config config) { } void ts_document_set_text(ts_document *document, const char *text) { - ts_parse_result result = document->parse_fn(text); - document->tree = result.tree; - document->error = result.error; + const ts_tree * result = document->parse_fn(text); + document->tree = result; + document->errors = NULL; } -ts_tree * ts_document_tree(const ts_document *document) { +const ts_tree * ts_document_tree(const ts_document *document) { return document->tree; } const char * ts_document_string(const ts_document *document) { - if (document->error.expected_inputs != NULL) { - return ts_error_string(&document->error); + if (document->error_count > 0) { + return ts_tree_error_string(document->errors[0], document->symbol_names); } else { return ts_tree_string(document->tree, document->symbol_names); } diff --git a/src/runtime/error.cpp b/src/runtime/error.cpp deleted file mode 100644 index e945cbb5..00000000 --- a/src/runtime/error.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "tree_sitter/runtime.h" -#include -#include "string.h" - -using std::string; - -const char * ts_error_string(const ts_error *error) { - string result = string("Unexpected character '") + error->lookahead_char + "'. Expected:"; - for (int i = 0; i < error->expected_input_count; i++) { - result += string(" ") + error->expected_inputs[i]; - } - - char *stuff = (char *)malloc(result.size() * sizeof(char)); - strcpy(stuff, result.c_str()); - return stuff; -} diff --git a/src/runtime/tree.cpp b/src/runtime/tree.cpp index 384ca8cc..05771569 100644 --- a/src/runtime/tree.cpp +++ b/src/runtime/tree.cpp @@ -3,18 +3,43 @@ #include using std::string; +using std::to_string; -ts_tree * ts_tree_make(ts_symbol value, size_t child_count, ts_tree **children) { +const ts_symbol ts_symbol_error = -1; + +ts_tree * ts_tree_make_leaf(ts_symbol symbol) { ts_tree *result = new ts_tree(); - result->value = value; - result->child_count = child_count; - result->children = children; result->ref_count = 0; + result->symbol = symbol; + result->data.children = { + .count = 0, + .contents = NULL + }; + return result; +} + +ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children) { + ts_tree *result = new ts_tree(); + result->ref_count = 0; + result->symbol = symbol; + result->data.children = { + .count = child_count, + .contents = children + }; for (int i = 0; i < child_count; i++) ts_tree_retain(children[i]); return result; } +ts_tree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const ts_symbol *expected_inputs) { + ts_tree *result = new ts_tree(); + result->data.error = { + .lookahead_char = lookahead_char, + .expected_input_count = expected_input_count, + }; + return result; +} + void ts_tree_retain(ts_tree *tree) { tree->ref_count++; } @@ -22,28 +47,46 @@ void ts_tree_retain(ts_tree *tree) { void ts_tree_release(ts_tree *tree) { tree->ref_count--; if (tree->ref_count == 0) { - for (int i = 0; i < tree->child_count; i++) - ts_tree_release(tree->children[i]); + ts_tree **children = tree->data.children.contents; + for (int i = 0; i < ts_tree_child_count(tree); i++) + ts_tree_release(children[i]); +// free(children); free(tree); } } int ts_tree_equals(const ts_tree *node1, const ts_tree *node2) { - if (node1->value != node2->value) return 0; - if (node1->child_count != node2->child_count) return 0; - for (int i = 0; i < node1->child_count; i++) { - ts_tree *child1 = node1->children[i]; - ts_tree *child2 = node2->children[i]; - if (!ts_tree_equals(child1, child2)) return 0; + if (node1->symbol != node2->symbol) return 0; + if (node1->symbol == ts_symbol_error) { + // check error equality + } else { + if (node1->data.children.count != node2->data.children.count) + return 0; + for (int i = 0; i < node1->data.children.count; i++) { + ts_tree *child1 = node1->data.children.contents[i]; + ts_tree *child2 = node2->data.children.contents[i]; + if (!ts_tree_equals(child1, child2)) + return 0; + } } return 1; } +ts_tree ** ts_tree_children(const ts_tree *tree) { + if (tree->symbol == ts_symbol_error) return NULL; + return tree->data.children.contents; +} + +size_t ts_tree_child_count(const ts_tree *tree) { + if (tree->symbol == ts_symbol_error) return 0; + return tree->data.children.count; +} + static string __tree_to_string(const ts_tree *tree, const char **symbol_names) { if (!tree) return "#"; - string result = string("(") + symbol_names[tree->value]; - for (int i = 0; i < tree->child_count; i++) - result += " " + __tree_to_string(tree->children[i], symbol_names); + string result = string("(") + symbol_names[tree->symbol]; + for (int i = 0; i < tree->data.children.count; i++) + result += " " + __tree_to_string(tree->data.children.contents[i], symbol_names); return result + ")"; } @@ -53,3 +96,15 @@ char * ts_tree_string(const ts_tree *tree, const char **symbol_names) { strcpy(result, value.c_str()); return result; } + +char * ts_tree_error_string(const ts_tree *tree, const char **symbol_names) { + string result = string("Unexpected character '") + tree->data.error.lookahead_char + "'. Expected:"; + for (int i = 0; i < tree->data.error.expected_input_count; i++) { + ts_symbol symbol = tree->data.error.expected_inputs[i]; + result += string(" ") + symbol_names[symbol]; + } + + char *stuff = (char *)malloc(result.size() * sizeof(char)); + strcpy(stuff, result.c_str()); + return stuff; +} diff --git a/todo.md b/todo.md index caf610a8..a8953b46 100644 --- a/todo.md +++ b/todo.md @@ -2,6 +2,11 @@ TODO ==== ## batch parsing +- error handling: keep going when errors are encountered and put error nodes into the AST +- more example grammars: + - go + - javascript + - ruby - add comments to generated C code describing the generated tokens (regexp pattern) - fix any memory leaks - add special lexical behavior for indentation-aware languages diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index c6f5d1a2..6899a455 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -23,7 +23,6 @@ 127528B518AACB70006B682B /* rule_can_be_blank_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 127528B418AACB70006B682B /* rule_can_be_blank_spec.cpp */; }; 12AB465F188BD03E00DE79DF /* follow_sets.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12AB465D188BD03E00DE79DF /* follow_sets.cpp */; }; 12AB4661188CB3A300DE79DF /* item_set_closure_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */; }; - 12BC470518822B27005AC502 /* error.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470318822A17005AC502 /* error.cpp */; }; 12BC470718830BC5005AC502 /* first_set_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470618830BC5005AC502 /* first_set_spec.cpp */; }; 12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; }; 12E75A9A1891BF57001B8F10 /* json.c in Sources */ = {isa = PBXBuildFile; fileRef = 12E75A981891BF3B001B8F10 /* json.c */; }; @@ -117,7 +116,6 @@ 12AB465E188BD03E00DE79DF /* follow_sets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = follow_sets.h; sourceTree = ""; }; 12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = item_set_closure_spec.cpp; sourceTree = ""; }; 12AB4663188DCB9800DE79DF /* stream_methods.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = stream_methods.h; sourceTree = ""; }; - 12BC470318822A17005AC502 /* error.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = error.cpp; sourceTree = ""; }; 12BC470618830BC5005AC502 /* first_set_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = first_set_spec.cpp; sourceTree = ""; }; 12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = ""; }; 12D136A0183570F5005F3369 /* pattern_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pattern_spec.cpp; path = spec/compiler/rules/pattern_spec.cpp; sourceTree = SOURCE_ROOT; }; @@ -395,7 +393,6 @@ isa = PBXGroup; children = ( 12EDCF8C187C6282005A7A07 /* document.cpp */, - 12BC470318822A17005AC502 /* error.cpp */, 12FD40DE1860064C0041A84E /* tree.cpp */, ); path = runtime; @@ -563,7 +560,6 @@ 12E75A9C1891C17D001B8F10 /* json_spec.cpp in Sources */, 12FD40DB185FEF0D0041A84E /* arithmetic_spec.cpp in Sources */, 12FD40C2185EEB5E0041A84E /* main.cpp in Sources */, - 12BC470518822B27005AC502 /* error.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; };