Start work on error recovery

- In runtime, make parse errors part of the parse tree
- Add error state to lexers in which they can accept any token
This commit is contained in:
Max Brunsfeld 2014-02-24 18:42:54 -08:00
parent 4520d6e1a2
commit e58a6d8ba7
18 changed files with 622 additions and 528 deletions

View file

@ -30,81 +30,82 @@ LEX_FN() {
START_LEXER();
switch (LEX_STATE()) {
case 0:
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(1, EXPECT({"<EOF>"}));
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_aux_sym_end);
case 2:
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(3);
LEX_ERROR(2, EXPECT({"<EOF>", "+"}));
case 3:
ADVANCE(2);
LEX_ERROR();
case 2:
ACCEPT_TOKEN(ts_sym_plus);
case 4:
case 3:
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
LEX_ERROR(1, EXPECT({")"}));
case 5:
ADVANCE(4);
LEX_ERROR();
case 4:
ACCEPT_TOKEN(ts_aux_sym_token2);
case 5:
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(4);
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(2);
LEX_ERROR();
case 6:
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
ADVANCE(4);
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(7);
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(3);
LEX_ERROR(2, EXPECT({")", "+"}));
ADVANCE(2);
LEX_ERROR();
case 7:
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(8);
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(3);
LEX_ERROR(1, EXPECT({")-+"}));
case 8:
ACCEPT_TOKEN(ts_sym_times);
case 9:
case 8:
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(9);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(10);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(11);
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
ADVANCE(12);
LEX_ERROR(4, EXPECT({"(", "0-9", "A-Z", "a-z"}));
case 10:
ADVANCE(11);
LEX_ERROR();
case 9:
ACCEPT_TOKEN(ts_aux_sym_token1);
case 11:
case 10:
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(11);
ADVANCE(10);
ACCEPT_TOKEN(ts_sym_number);
case 12:
case 11:
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
ADVANCE(12);
ADVANCE(11);
ACCEPT_TOKEN(ts_sym_variable);
case 13:
case 12:
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
ADVANCE(4);
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(8);
LEX_ERROR(1, EXPECT({")-*"}));
case 14:
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
ADVANCE(7);
LEX_ERROR();
case 13:
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(8);
ADVANCE(7);
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(3);
LEX_ERROR(2, EXPECT({"<EOF>", "*-+"}));
case 15:
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
ADVANCE(2);
LEX_ERROR();
case 14:
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(8);
LEX_ERROR(2, EXPECT({"<EOF>", "*"}));
ADVANCE(7);
LEX_ERROR();
case ts_lex_state_error:
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(7);
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(2);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(10);
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
ADVANCE(11);
LEX_ERROR();
default:
LEX_PANIC();
}
@ -115,7 +116,7 @@ PARSE_FN() {
START_PARSER();
switch (PARSE_STATE()) {
case 0:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(1);
@ -130,7 +131,7 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(49);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 1:
SET_LEX_STATE(0);
@ -138,10 +139,10 @@ PARSE_FN() {
case ts_aux_sym_end:
ACCEPT_INPUT();
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 2:
SET_LEX_STATE(14);
SET_LEX_STATE(13);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_term, 1, COLLAPSE({0}));
@ -150,10 +151,10 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_term, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_end}));
}
case 3:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_factor:
SHIFT(4);
@ -164,30 +165,30 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(6);
default:
PARSE_PANIC();
PARSE_ERROR(4, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_variable, ts_aux_sym_token1}));
}
case 4:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0}));
case ts_aux_sym_end:
REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_end}));
}
case 5:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
case ts_aux_sym_end:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_end}));
}
case 6:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(7);
@ -202,28 +203,28 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(26);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 7:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
SHIFT(8);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 8:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
case ts_aux_sym_end:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_end}));
}
case 9:
SET_LEX_STATE(7);
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_term, 1, COLLAPSE({0}));
@ -232,10 +233,10 @@ PARSE_FN() {
case ts_aux_sym_token2:
REDUCE(ts_sym_term, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_token2}));
}
case 10:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_factor:
SHIFT(11);
@ -246,30 +247,30 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(13);
default:
PARSE_PANIC();
PARSE_ERROR(4, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_variable, ts_aux_sym_token1}));
}
case 11:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0}));
case ts_aux_sym_token2:
REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_token2}));
}
case 12:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
case ts_aux_sym_token2:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_token2}));
}
case 13:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(14);
@ -284,28 +285,28 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(26);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 14:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
SHIFT(15);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 15:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
case ts_aux_sym_token2:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_token2}));
}
case 16:
SET_LEX_STATE(7);
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
@ -314,20 +315,20 @@ PARSE_FN() {
case ts_aux_sym_token2:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_token2}));
}
case 17:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
SHIFT(18);
case ts_aux_sym_token2:
REDUCE(ts_sym_expression, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_token2}));
}
case 18:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_factor:
SHIFT(19);
@ -340,20 +341,20 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(31);
default:
PARSE_PANIC();
PARSE_ERROR(5, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 19:
SET_LEX_STATE(13);
SET_LEX_STATE(12);
switch (LOOKAHEAD_SYM()) {
case ts_sym_times:
SHIFT(20);
case ts_aux_sym_token2:
REDUCE(ts_sym_term, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_token2}));
}
case 20:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_factor:
SHIFT(21);
@ -364,26 +365,26 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(23);
default:
PARSE_PANIC();
PARSE_ERROR(4, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_variable, ts_aux_sym_token1}));
}
case 21:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 22:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 23:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(24);
@ -398,26 +399,26 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(26);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 24:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
SHIFT(25);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 25:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 26:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(27);
@ -432,18 +433,18 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(26);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 27:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
SHIFT(28);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 28:
SET_LEX_STATE(7);
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
@ -452,28 +453,28 @@ PARSE_FN() {
case ts_aux_sym_token2:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_token2}));
}
case 29:
SET_LEX_STATE(13);
SET_LEX_STATE(12);
switch (LOOKAHEAD_SYM()) {
case ts_sym_times:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
case ts_aux_sym_token2:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_token2}));
}
case 30:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
REDUCE(ts_sym_expression, 3, COLLAPSE({0, 0, 0}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 31:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(32);
@ -488,28 +489,28 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(26);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 32:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
SHIFT(33);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 33:
SET_LEX_STATE(13);
SET_LEX_STATE(12);
switch (LOOKAHEAD_SYM()) {
case ts_sym_times:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
case ts_aux_sym_token2:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_token2}));
}
case 34:
SET_LEX_STATE(14);
SET_LEX_STATE(13);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
@ -518,20 +519,20 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_end}));
}
case 35:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
SHIFT(36);
case ts_aux_sym_end:
REDUCE(ts_sym_expression, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_plus, ts_aux_sym_end}));
}
case 36:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_factor:
SHIFT(37);
@ -544,20 +545,20 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(46);
default:
PARSE_PANIC();
PARSE_ERROR(5, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 37:
SET_LEX_STATE(15);
SET_LEX_STATE(14);
switch (LOOKAHEAD_SYM()) {
case ts_sym_times:
SHIFT(38);
case ts_aux_sym_end:
REDUCE(ts_sym_term, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_end}));
}
case 38:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_factor:
SHIFT(39);
@ -568,7 +569,7 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(41);
default:
PARSE_PANIC();
PARSE_ERROR(4, EXPECT({ts_sym_factor, ts_sym_number, ts_sym_variable, ts_aux_sym_token1}));
}
case 39:
SET_LEX_STATE(0);
@ -576,7 +577,7 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_term, 3, COLLAPSE({0, 0, 0}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 40:
SET_LEX_STATE(0);
@ -584,10 +585,10 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 41:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(42);
@ -602,15 +603,15 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(26);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 42:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
SHIFT(43);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 43:
SET_LEX_STATE(0);
@ -618,17 +619,17 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 44:
SET_LEX_STATE(15);
SET_LEX_STATE(14);
switch (LOOKAHEAD_SYM()) {
case ts_sym_times:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
case ts_aux_sym_end:
REDUCE(ts_sym_factor, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_end}));
}
case 45:
SET_LEX_STATE(0);
@ -636,10 +637,10 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_expression, 3, COLLAPSE({0, 0, 0}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 46:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(47);
@ -654,28 +655,28 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(26);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 47:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
SHIFT(48);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 48:
SET_LEX_STATE(15);
SET_LEX_STATE(14);
switch (LOOKAHEAD_SYM()) {
case ts_sym_times:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
case ts_aux_sym_end:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_times, ts_aux_sym_end}));
}
case 49:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_expression:
SHIFT(50);
@ -690,18 +691,18 @@ PARSE_FN() {
case ts_aux_sym_token1:
SHIFT(26);
default:
PARSE_PANIC();
PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1}));
}
case 50:
SET_LEX_STATE(4);
SET_LEX_STATE(3);
switch (LOOKAHEAD_SYM()) {
case ts_aux_sym_token2:
SHIFT(51);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_token2}));
}
case 51:
SET_LEX_STATE(14);
SET_LEX_STATE(13);
switch (LOOKAHEAD_SYM()) {
case ts_sym_plus:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
@ -710,7 +711,7 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_factor, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_plus, ts_sym_times, ts_aux_sym_end}));
}
default:
PARSE_PANIC();

View file

@ -38,135 +38,161 @@ LEX_FN() {
START_LEXER();
switch (LEX_STATE()) {
case 0:
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(1, EXPECT({"<EOF>"}));
LEX_ERROR();
case 1:
ACCEPT_TOKEN(ts_aux_sym_end);
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(2);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(3);
LEX_ERROR();
case 2:
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(3);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(4);
LEX_ERROR(2, EXPECT({",", "}"}));
case 3:
ACCEPT_TOKEN(ts_sym_comma);
case 4:
case 3:
ACCEPT_TOKEN(ts_sym_right_brace);
case 5:
case 4:
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(4);
LEX_ERROR(1, EXPECT({"}"}));
case 6:
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(3);
LEX_ERROR();
case 5:
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(2);
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(7);
LEX_ERROR(2, EXPECT({",", "]"}));
case 7:
ADVANCE(6);
LEX_ERROR();
case 6:
ACCEPT_TOKEN(ts_sym_right_bracket);
case 8:
case 7:
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(7);
LEX_ERROR(1, EXPECT({"]"}));
case 9:
ADVANCE(6);
LEX_ERROR();
case 8:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(10);
ADVANCE(9);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(16);
ADVANCE(15);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(17);
ADVANCE(16);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(18);
LEX_ERROR(4, EXPECT({"\"", "0-9", "[", "{"}));
ADVANCE(17);
LEX_ERROR();
case 9:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(12);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(14);
LEX_ERROR();
case 10:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(11);
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
ADVANCE(12);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
LEX_ERROR(1, EXPECT({"<ANY>"}));
ADVANCE(14);
LEX_ERROR();
case 11:
ACCEPT_TOKEN(ts_sym_string);
case 12:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(11);
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(12);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
LEX_ERROR(1, EXPECT({"<ANY>"}));
case 12:
ACCEPT_TOKEN(ts_sym_string);
ADVANCE(14);
LEX_ERROR();
case 13:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(11);
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(14);
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
LEX_ERROR(1, EXPECT({"<ANY>"}));
case 14:
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(11);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(13);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
ADVANCE(14);
ACCEPT_TOKEN(ts_sym_string);
case 15:
case 14:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(11);
LEX_ERROR(1, EXPECT({"\""}));
case 16:
ADVANCE(10);
LEX_ERROR();
case 15:
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(16);
ADVANCE(15);
ACCEPT_TOKEN(ts_sym_number);
case 17:
case 16:
ACCEPT_TOKEN(ts_sym_left_bracket);
case 18:
case 17:
ACCEPT_TOKEN(ts_sym_left_brace);
case 19:
case 18:
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(20);
LEX_ERROR(1, EXPECT({":"}));
case 20:
ADVANCE(19);
LEX_ERROR();
case 19:
ACCEPT_TOKEN(ts_sym_colon);
case 20:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(9);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(3);
LEX_ERROR();
case 21:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(10);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(4);
LEX_ERROR(2, EXPECT({"\"", "}"}));
ADVANCE(9);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(15);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(16);
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(6);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(17);
LEX_ERROR();
case 22:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(10);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(16);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(17);
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(7);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(18);
LEX_ERROR(5, EXPECT({"\"", "0-9", "[", "]", "{"}));
ADVANCE(9);
LEX_ERROR();
case 23:
ACCEPT_TOKEN(ts_sym_comma);
case 24:
ACCEPT_TOKEN(ts_sym_colon);
case 25:
ACCEPT_TOKEN(ts_sym_left_bracket);
case 26:
ACCEPT_TOKEN(ts_sym_right_bracket);
case 27:
ACCEPT_TOKEN(ts_sym_left_brace);
case 28:
ACCEPT_TOKEN(ts_sym_right_brace);
case ts_lex_state_error:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(10);
LEX_ERROR(1, EXPECT({"\""}));
ADVANCE(9);
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(23);
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(15);
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(24);
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(25);
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(26);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(27);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(28);
LEX_ERROR();
default:
LEX_PANIC();
}
@ -177,7 +203,7 @@ PARSE_FN() {
START_PARSER();
switch (PARSE_STATE()) {
case 0:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(1);
@ -194,7 +220,7 @@ PARSE_FN() {
case ts_sym_left_bracket:
SHIFT(44);
default:
PARSE_PANIC();
PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket}));
}
case 1:
SET_LEX_STATE(0);
@ -202,7 +228,7 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_value, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 2:
SET_LEX_STATE(0);
@ -210,28 +236,28 @@ PARSE_FN() {
case ts_aux_sym_end:
ACCEPT_INPUT();
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 3:
SET_LEX_STATE(21);
SET_LEX_STATE(20);
switch (LOOKAHEAD_SYM()) {
case ts_sym_string:
SHIFT(4);
case ts_sym_right_brace:
SHIFT(43);
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_string, ts_sym_right_brace}));
}
case 4:
SET_LEX_STATE(19);
SET_LEX_STATE(18);
switch (LOOKAHEAD_SYM()) {
case ts_sym_colon:
SHIFT(5);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_colon}));
}
case 5:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(6);
@ -248,20 +274,20 @@ PARSE_FN() {
case ts_sym_left_bracket:
SHIFT(19);
default:
PARSE_PANIC();
PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket}));
}
case 6:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_value, 1, COLLAPSE({0}));
case ts_sym_right_brace:
REDUCE(ts_sym_value, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace}));
}
case 7:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
SHIFT(8);
@ -270,26 +296,26 @@ PARSE_FN() {
case ts_aux_sym_repeat_helper2:
SHIFT(41);
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_brace, ts_aux_sym_repeat_helper2}));
}
case 8:
SET_LEX_STATE(23);
SET_LEX_STATE(22);
switch (LOOKAHEAD_SYM()) {
case ts_sym_string:
SHIFT(9);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_string}));
}
case 9:
SET_LEX_STATE(19);
SET_LEX_STATE(18);
switch (LOOKAHEAD_SYM()) {
case ts_sym_colon:
SHIFT(10);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_colon}));
}
case 10:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(6);
@ -306,10 +332,10 @@ PARSE_FN() {
case ts_sym_left_bracket:
SHIFT(19);
default:
PARSE_PANIC();
PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket}));
}
case 11:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
SHIFT(8);
@ -318,36 +344,36 @@ PARSE_FN() {
case ts_aux_sym_repeat_helper2:
SHIFT(12);
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_brace, ts_aux_sym_repeat_helper2}));
}
case 12:
SET_LEX_STATE(5);
SET_LEX_STATE(4);
switch (LOOKAHEAD_SYM()) {
case ts_sym_right_brace:
REDUCE(ts_aux_sym_repeat_helper2, 5, COLLAPSE({1, 0, 1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_right_brace}));
}
case 13:
SET_LEX_STATE(21);
SET_LEX_STATE(20);
switch (LOOKAHEAD_SYM()) {
case ts_sym_string:
SHIFT(14);
case ts_sym_right_brace:
SHIFT(40);
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_string, ts_sym_right_brace}));
}
case 14:
SET_LEX_STATE(19);
SET_LEX_STATE(18);
switch (LOOKAHEAD_SYM()) {
case ts_sym_colon:
SHIFT(15);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_colon}));
}
case 15:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(6);
@ -364,10 +390,10 @@ PARSE_FN() {
case ts_sym_left_bracket:
SHIFT(19);
default:
PARSE_PANIC();
PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket}));
}
case 16:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
SHIFT(8);
@ -376,28 +402,28 @@ PARSE_FN() {
case ts_aux_sym_repeat_helper2:
SHIFT(17);
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_brace, ts_aux_sym_repeat_helper2}));
}
case 17:
SET_LEX_STATE(5);
SET_LEX_STATE(4);
switch (LOOKAHEAD_SYM()) {
case ts_sym_right_brace:
SHIFT(18);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_right_brace}));
}
case 18:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1}));
case ts_sym_right_brace:
REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace}));
}
case 19:
SET_LEX_STATE(22);
SET_LEX_STATE(21);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(20);
@ -416,20 +442,20 @@ PARSE_FN() {
case ts_sym_right_bracket:
SHIFT(39);
default:
PARSE_PANIC();
PARSE_ERROR(8, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket, ts_sym_right_bracket}));
}
case 20:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_value, 1, COLLAPSE({0}));
case ts_sym_right_bracket:
REDUCE(ts_sym_value, 1, COLLAPSE({0}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket}));
}
case 21:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
SHIFT(22);
@ -438,10 +464,10 @@ PARSE_FN() {
case ts_aux_sym_repeat_helper1:
SHIFT(37);
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_bracket, ts_aux_sym_repeat_helper1}));
}
case 22:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(20);
@ -458,10 +484,10 @@ PARSE_FN() {
case ts_sym_left_bracket:
SHIFT(32);
default:
PARSE_PANIC();
PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket}));
}
case 23:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
SHIFT(22);
@ -470,36 +496,36 @@ PARSE_FN() {
case ts_aux_sym_repeat_helper1:
SHIFT(24);
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_bracket, ts_aux_sym_repeat_helper1}));
}
case 24:
SET_LEX_STATE(8);
SET_LEX_STATE(7);
switch (LOOKAHEAD_SYM()) {
case ts_sym_right_bracket:
REDUCE(ts_aux_sym_repeat_helper1, 3, COLLAPSE({1, 0, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_right_bracket}));
}
case 25:
SET_LEX_STATE(21);
SET_LEX_STATE(20);
switch (LOOKAHEAD_SYM()) {
case ts_sym_string:
SHIFT(26);
case ts_sym_right_brace:
SHIFT(31);
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_string, ts_sym_right_brace}));
}
case 26:
SET_LEX_STATE(19);
SET_LEX_STATE(18);
switch (LOOKAHEAD_SYM()) {
case ts_sym_colon:
SHIFT(27);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_colon}));
}
case 27:
SET_LEX_STATE(9);
SET_LEX_STATE(8);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(6);
@ -516,10 +542,10 @@ PARSE_FN() {
case ts_sym_left_bracket:
SHIFT(19);
default:
PARSE_PANIC();
PARSE_ERROR(7, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket}));
}
case 28:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
SHIFT(8);
@ -528,38 +554,38 @@ PARSE_FN() {
case ts_aux_sym_repeat_helper2:
SHIFT(29);
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_brace, ts_aux_sym_repeat_helper2}));
}
case 29:
SET_LEX_STATE(5);
SET_LEX_STATE(4);
switch (LOOKAHEAD_SYM()) {
case ts_sym_right_brace:
SHIFT(30);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_right_brace}));
}
case 30:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1}));
case ts_sym_right_bracket:
REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket}));
}
case 31:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_object, 2, COLLAPSE({1, 1}));
case ts_sym_right_bracket:
REDUCE(ts_sym_object, 2, COLLAPSE({1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket}));
}
case 32:
SET_LEX_STATE(22);
SET_LEX_STATE(21);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(20);
@ -578,10 +604,10 @@ PARSE_FN() {
case ts_sym_right_bracket:
SHIFT(36);
default:
PARSE_PANIC();
PARSE_ERROR(8, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket, ts_sym_right_bracket}));
}
case 33:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
SHIFT(22);
@ -590,81 +616,81 @@ PARSE_FN() {
case ts_aux_sym_repeat_helper1:
SHIFT(34);
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_bracket, ts_aux_sym_repeat_helper1}));
}
case 34:
SET_LEX_STATE(8);
SET_LEX_STATE(7);
switch (LOOKAHEAD_SYM()) {
case ts_sym_right_bracket:
SHIFT(35);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_right_bracket}));
}
case 35:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1}));
case ts_sym_right_bracket:
REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket}));
}
case 36:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_array, 2, COLLAPSE({1, 1}));
case ts_sym_right_bracket:
REDUCE(ts_sym_array, 2, COLLAPSE({1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_bracket}));
}
case 37:
SET_LEX_STATE(8);
SET_LEX_STATE(7);
switch (LOOKAHEAD_SYM()) {
case ts_sym_right_bracket:
SHIFT(38);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_right_bracket}));
}
case 38:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1}));
case ts_sym_right_brace:
REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace}));
}
case 39:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_array, 2, COLLAPSE({1, 1}));
case ts_sym_right_brace:
REDUCE(ts_sym_array, 2, COLLAPSE({1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace}));
}
case 40:
SET_LEX_STATE(2);
SET_LEX_STATE(1);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
REDUCE(ts_sym_object, 2, COLLAPSE({1, 1}));
case ts_sym_right_brace:
REDUCE(ts_sym_object, 2, COLLAPSE({1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(2, EXPECT({ts_sym_comma, ts_sym_right_brace}));
}
case 41:
SET_LEX_STATE(5);
SET_LEX_STATE(4);
switch (LOOKAHEAD_SYM()) {
case ts_sym_right_brace:
SHIFT(42);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_right_brace}));
}
case 42:
SET_LEX_STATE(0);
@ -672,7 +698,7 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_object, 6, COLLAPSE({1, 0, 1, 0, 1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 43:
SET_LEX_STATE(0);
@ -680,10 +706,10 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_object, 2, COLLAPSE({1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 44:
SET_LEX_STATE(22);
SET_LEX_STATE(21);
switch (LOOKAHEAD_SYM()) {
case ts_sym_array:
SHIFT(20);
@ -702,10 +728,10 @@ PARSE_FN() {
case ts_sym_right_bracket:
SHIFT(48);
default:
PARSE_PANIC();
PARSE_ERROR(8, EXPECT({ts_sym_array, ts_sym_number, ts_sym_object, ts_sym_string, ts_sym_value, ts_sym_left_brace, ts_sym_left_bracket, ts_sym_right_bracket}));
}
case 45:
SET_LEX_STATE(6);
SET_LEX_STATE(5);
switch (LOOKAHEAD_SYM()) {
case ts_sym_comma:
SHIFT(22);
@ -714,15 +740,15 @@ PARSE_FN() {
case ts_aux_sym_repeat_helper1:
SHIFT(46);
default:
PARSE_PANIC();
PARSE_ERROR(3, EXPECT({ts_sym_comma, ts_sym_right_bracket, ts_aux_sym_repeat_helper1}));
}
case 46:
SET_LEX_STATE(8);
SET_LEX_STATE(7);
switch (LOOKAHEAD_SYM()) {
case ts_sym_right_bracket:
SHIFT(47);
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_sym_right_bracket}));
}
case 47:
SET_LEX_STATE(0);
@ -730,7 +756,7 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_array, 4, COLLAPSE({1, 0, 1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
case 48:
SET_LEX_STATE(0);
@ -738,7 +764,7 @@ PARSE_FN() {
case ts_aux_sym_end:
REDUCE(ts_sym_array, 2, COLLAPSE({1, 1}));
default:
PARSE_PANIC();
PARSE_ERROR(1, EXPECT({ts_aux_sym_end}));
}
default:
PARSE_PANIC();

View file

@ -12,7 +12,7 @@ extern "C" {
//#define TS_DEBUG_PARSE
//#define TS_DEBUG_LEX
#ifdef TS_DEBUG_LEX
#define DEBUG_LEX(...) fprintf(stderr, __VA_ARGS__)
#else
@ -27,8 +27,9 @@ extern "C" {
static int INITIAL_STACK_SIZE = 100;
static const char *ts_symbol_names[];
typedef int ts_state;
static const ts_state ts_lex_state_error = -1;
typedef struct {
ts_state state;
@ -37,15 +38,18 @@ typedef struct {
typedef struct {
const char *input;
int error_mode;
size_t position;
ts_tree *lookahead_node;
ts_tree *prev_lookahead_node;
ts_state lex_state;
ts_stack_entry *stack;
size_t stack_size;
ts_parse_result result;
ts_tree *result;
} ts_parser;
static void ts_lex(ts_parser *parser);
static ts_parser ts_parser_make(const char *input) {
ts_parser result = {
.input = input,
@ -54,13 +58,7 @@ static ts_parser ts_parser_make(const char *input) {
.lex_state = 0,
.stack = calloc(INITIAL_STACK_SIZE, sizeof(ts_stack_entry)),
.stack_size = 0,
.result = {
.tree = NULL,
.error = {
.expected_inputs = NULL,
.expected_input_count = 0
},
},
.result = NULL,
};
return result;
}
@ -69,9 +67,9 @@ static char ts_parser_lookahead_char(const ts_parser *parser) {
return parser->input[parser->position];
}
static long ts_parser_lookahead_sym(const ts_parser *parser) {
static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) {
ts_tree *node = parser->lookahead_node;
return node ? node->value : -1;
return node ? node->symbol : ts_symbol_error;
}
static ts_state ts_parser_parse_state(const ts_parser *parser) {
@ -96,7 +94,7 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
for (int i = 0; i < immediate_child_count; i++) {
ts_tree *child = parser->stack[parser->stack_size + i].node;
if (collapse_flags[i]) {
total_child_count += child->child_count;
total_child_count += ts_tree_child_count(child);
} else {
total_child_count++;
}
@ -107,8 +105,11 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
for (int i = 0; i < immediate_child_count; i++) {
ts_tree *child = parser->stack[parser->stack_size + i].node;
if (collapse_flags[i]) {
memcpy(children + n, child->children, (child->child_count * sizeof(ts_tree *)));
n += child->child_count;
size_t grandchild_count = ts_tree_child_count(child);
if (grandchild_count > 0) {
memcpy(children + n, ts_tree_children(child), (grandchild_count * sizeof(ts_tree *)));
n += grandchild_count;
}
} else {
children[n] = child;
n++;
@ -116,22 +117,10 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
}
parser->prev_lookahead_node = parser->lookahead_node;
parser->lookahead_node = ts_tree_make(symbol, total_child_count, children);
parser->lookahead_node = ts_tree_make_node(symbol, total_child_count, children);
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser));
}
static void ts_parser_set_error(ts_parser *parser, size_t count, const char **expected_inputs) {
ts_error *error = &parser->result.error;
error->position = parser->position;
error->lookahead_char = ts_parser_lookahead_char(parser);
error->expected_input_count = count;
error->expected_inputs = expected_inputs;
}
static int ts_parser_has_error(const ts_parser *parser) {
return (parser->result.error.expected_inputs != NULL);
}
static void ts_parser_advance(ts_parser *parser, ts_state lex_state) {
DEBUG_LEX("character: '%c' \n", ts_parser_lookahead_char(parser));
parser->position++;
@ -140,18 +129,28 @@ static void ts_parser_advance(ts_parser *parser, ts_state lex_state) {
static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) {
DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]);
parser->lookahead_node = ts_tree_make(symbol, 0, NULL);
parser->lookahead_node = ts_tree_make_leaf(symbol);
}
static void ts_parser_accept_input(ts_parser *parser) {
parser->result.tree = parser->stack[parser->stack_size - 1].node;
static ts_tree * ts_parser_tree(ts_parser *parser) {
DEBUG_PARSE("accept \n");
return parser->stack[0].node;
}
static void ts_parser_skip_whitespace(ts_parser *parser) {
while (isspace(parser->input[parser->position]))
while (isspace(ts_parser_lookahead_char(parser)))
parser->position++;
}
static void ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) {
if (parser->error_mode) {
parser->lex_state = ts_lex_state_error;
ts_lex(parser);
} else {
parser->error_mode = 1;
parser->lookahead_node = ts_tree_make_error(ts_parser_lookahead_char(parser), count, expected_symbols);
}
}
#pragma mark - DSL
@ -159,7 +158,7 @@ static void ts_parser_skip_whitespace(ts_parser *parser) {
static void ts_lex(ts_parser *parser)
#define PARSE_FN() \
static ts_parse_result ts_parse(const char *input)
static const ts_tree * ts_parse(const char *input)
#define SYMBOL_NAMES \
static const char *ts_symbol_names[] =
@ -175,6 +174,9 @@ ts_parser p = ts_parser_make(input), *parser = &p; \
next_state:
#define START_LEXER() \
if (ts_parser_lookahead_char(parser) == '\0') { \
ACCEPT_TOKEN(ts_aux_sym_end) \
} \
ts_parser_skip_whitespace(parser); \
next_state:
@ -194,7 +196,6 @@ parser->lex_state
{ \
parser->lex_state = state_index; \
if (LOOKAHEAD_SYM() < 0) ts_lex(parser); \
if (ts_parser_has_error(parser)) goto done; \
}
#define SHIFT(state) \
@ -211,30 +212,33 @@ goto next_state; \
}
#define ACCEPT_INPUT() \
{ ts_parser_accept_input(parser); goto done; }
{ goto done; }
#define ACCEPT_TOKEN(symbol) \
{ ts_parser_set_lookahead_sym(parser, symbol); goto done; }
#define LEX_ERROR(count, inputs) \
#define LEX_ERROR() \
{ ts_parser_set_lookahead_sym(parser, -1); goto done; }
#define PARSE_ERROR(count, inputs) \
{ \
static const char *expected_inputs[] = inputs; \
ts_parser_set_error(parser, count, expected_inputs); \
goto done; \
static const ts_symbol expected_inputs[] = inputs; \
ts_parser_handle_error(parser, count, expected_inputs); \
goto next_state; \
}
#define LEX_PANIC() \
printf("Lex error: unexpected state %ud", LEX_STATE());
printf("Lex error: unexpected state %d", LEX_STATE());
#define PARSE_PANIC() \
printf("Parse error: unexpected state %ud", PARSE_STATE());
printf("Parse error: unexpected state %d", PARSE_STATE());
#define EXPECT(...) __VA_ARGS__
#define COLLAPSE(...) __VA_ARGS__
#define FINISH_PARSER() \
done: \
return parser->result;
return ts_parser_tree(parser);
#define FINISH_LEXER() \
done:

View file

@ -7,36 +7,37 @@ extern "C" {
#include <stdlib.h>
typedef struct {
size_t position;
char lookahead_char;
size_t expected_input_count;
const char **expected_inputs;
} ts_error;
const char * ts_error_string(const ts_error *error);
typedef size_t ts_symbol;
typedef int ts_symbol;
extern const ts_symbol ts_symbol_error;
typedef struct ts_tree {
ts_symbol value;
struct ts_tree **children;
size_t child_count;
ts_symbol symbol;
size_t ref_count;
union {
struct {
size_t count;
struct ts_tree **contents;
} children;
struct {
char lookahead_char;
size_t expected_input_count;
const ts_symbol *expected_inputs;
} error;
} data;
} ts_tree;
ts_tree * ts_tree_make(ts_symbol value, size_t child_count, ts_tree **children);
ts_tree * ts_tree_make_leaf(ts_symbol symbol);
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children);
ts_tree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const ts_symbol *expected_inputs);
void ts_tree_retain(ts_tree *tree);
void ts_tree_release(ts_tree *tree);
int ts_tree_equals(const ts_tree *tree1, const ts_tree *tree2);
char * ts_tree_string(const ts_tree *tree, const char **names);
char * ts_tree_error_string(const ts_tree *tree, const char **names);
size_t ts_tree_child_count(const ts_tree *tree);
ts_tree ** ts_tree_children(const ts_tree *tree);
typedef struct {
ts_error error;
ts_tree *tree;
} ts_parse_result;
typedef ts_parse_result ts_parse_fn(const char *);
typedef const ts_tree * ts_parse_fn(const char *);
typedef struct {
ts_parse_fn *parse_fn;
@ -49,7 +50,7 @@ ts_document * ts_document_make();
void ts_document_free(ts_document *);
void ts_document_set_parser(ts_document *document, ts_parse_config config);
void ts_document_set_text(ts_document *document, const char *text);
ts_tree * ts_document_tree(const ts_document *document);
const ts_tree * ts_document_tree(const ts_document *document);
const char * ts_document_string(const ts_document *document);
#ifdef __cplusplus

View file

@ -59,7 +59,7 @@ describe("building parse and lex tables", []() {
};
function<LexState(size_t)> lex_state = [&](size_t parse_state_index) {
long index = table.states[parse_state_index].lex_state_index;
long index = table.states[parse_state_index].lex_state_id;
return lex_table.states[index];
};

View file

@ -5,48 +5,48 @@ extern ts_parse_config ts_parse_config_json;
START_TEST
describe("json", []() {
ts_document *document;
ts_document *doc;
before_each([&]() {
document = ts_document_make();
ts_document_set_parser(document, ts_parse_config_json);
doc = ts_document_make();
ts_document_set_parser(doc, ts_parse_config_json);
});
after_each([&]() {
ts_document_free(document);
ts_document_free(doc);
});
it("parses strings", [&]() {
ts_document_set_text(document, "\"\"");
AssertThat(string(ts_document_string(document)), Equals("(value (string))"));
ts_document_set_text(doc, "\"\"");
AssertThat(string(ts_document_string(doc)), Equals("(value (string))"));
ts_document_set_text(document, "\"simple-string\"");
AssertThat(string(ts_document_string(document)), Equals("(value (string))"));
ts_document_set_text(doc, "\"simple-string\"");
AssertThat(string(ts_document_string(doc)), Equals("(value (string))"));
ts_document_set_text(document, "\"this is a \\\"string\\\" within a string\"");
AssertThat(string(ts_document_string(document)), Equals("(value (string))"));
ts_document_set_text(doc, "\"this is a \\\"string\\\" within a string\"");
AssertThat(string(ts_document_string(doc)), Equals("(value (string))"));
});
it("parses objects", [&]() {
ts_document_set_text(document, "{}");
AssertThat(string(ts_document_string(document)), Equals("(value (object))"));
ts_document_set_text(doc, "{}");
AssertThat(string(ts_document_string(doc)), Equals("(value (object))"));
ts_document_set_text(document, "{ \"key1\": 1 }");
AssertThat(string(ts_document_string(document)), Equals("(value (object (string) (value (number))))"));
ts_document_set_text(doc, "{ \"key1\": 1 }");
AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number))))"));
ts_document_set_text(document, "{\"key1\": 1, \"key2\": 2 }");
AssertThat(string(ts_document_string(document)), Equals("(value (object (string) (value (number)) (string) (value (number))))"));
ts_document_set_text(doc, "{\"key1\": 1, \"key2\": 2 }");
AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number)) (string) (value (number))))"));
});
it("parses arrays", [&]() {
ts_document_set_text(document, "[]");
AssertThat(string(ts_document_string(document)), Equals("(value (array))"));
ts_document_set_text(doc, "[]");
AssertThat(string(ts_document_string(doc)), Equals("(value (array))"));
ts_document_set_text(document, "[5]");
AssertThat(string(ts_document_string(document)), Equals("(value (array (value (number))))"));
ts_document_set_text(doc, "[5]");
AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number))))"));
ts_document_set_text(document, "[1, 2, 3]");
AssertThat(string(ts_document_string(document)), Equals("(value (array (value (number)) (value (number)) (value (number))))"));
ts_document_set_text(doc, "[1, 2, 3]");
AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (value (number)) (value (number))))"));
});
});

View file

@ -9,8 +9,8 @@ describe("trees", []() {
ts_tree *tree1, *parent1;
before_each([&]() {
tree1 = ts_tree_make(cat, 0, NULL);
parent1 = ts_tree_make(dog, 1, &tree1);
tree1 = ts_tree_make_leaf(cat);
parent1 = ts_tree_make_node(dog, 1, &tree1);
});
after_each([&]() {
@ -20,10 +20,10 @@ describe("trees", []() {
describe("equality", [&]() {
it("returns true for identical trees", [&]() {
ts_tree *tree2 = ts_tree_make(cat, 0, NULL);
ts_tree *tree2 = ts_tree_make_leaf(cat);
AssertThat(ts_tree_equals(tree1, tree2), Equals(1));
ts_tree *parent2 = ts_tree_make(dog, 1, &tree2);
ts_tree *parent2 = ts_tree_make_node(dog, 1, &tree2);
AssertThat(ts_tree_equals(parent1, parent2), Equals(1));
ts_tree_release(tree2);
@ -31,13 +31,13 @@ describe("trees", []() {
});
it("returns false for different trees", [&]() {
ts_tree *different_tree = ts_tree_make(pig, 0, NULL);
ts_tree *different_tree = ts_tree_make_leaf(pig);
AssertThat(ts_tree_equals(tree1, different_tree), Equals(0));
ts_tree *different_parent = ts_tree_make(dog, 1, &different_tree);
ts_tree *different_parent = ts_tree_make_node(dog, 1, &different_tree);
AssertThat(ts_tree_equals(parent1, different_parent), Equals(0));
ts_tree *parent_with_same_type = ts_tree_make(cat, 1, &different_parent);
ts_tree *parent_with_same_type = ts_tree_make_node(cat, 1, &different_parent);
AssertThat(ts_tree_equals(parent_with_same_type, tree1), Equals(0));
AssertThat(ts_tree_equals(tree1, parent_with_same_type), Equals(0));

View file

@ -14,105 +14,111 @@ namespace tree_sitter {
using rules::CharacterSet;
namespace build_tables {
static int NOT_FOUND = -1;
static int NOT_FOUND = -2;
static Symbol START("start", rules::SymbolTypeAuxiliary);
static Symbol END_OF_INPUT("end", rules::SymbolTypeAuxiliary);
class TableBuilder {
const PreparedGrammar grammar;
const PreparedGrammar lex_grammar;
map<const ParseItemSet, size_t> parse_state_indices;
map<const LexItemSet, size_t> lex_state_indices;
map<const ParseItemSet, ParseStateId> parse_state_ids;
map<const LexItemSet, LexStateId> lex_state_ids;
ParseTable parse_table;
LexTable lex_table;
long parse_state_index_for_item_set(const ParseItemSet &item_set) const {
auto entry = parse_state_indices.find(item_set);
return (entry == parse_state_indices.end()) ? NOT_FOUND : entry->second;
long parse_state_id_for_item_set(const ParseItemSet &item_set) const {
auto entry = parse_state_ids.find(item_set);
return (entry == parse_state_ids.end()) ? NOT_FOUND : entry->second;
}
long lex_state_index_for_item_set(const LexItemSet &item_set) const {
auto entry = lex_state_indices.find(item_set);
return (entry == lex_state_indices.end()) ? NOT_FOUND : entry->second;
long lex_state_id_for_item_set(const LexItemSet &item_set) const {
auto entry = lex_state_ids.find(item_set);
return (entry == lex_state_ids.end()) ? NOT_FOUND : entry->second;
}
void add_shift_actions(const ParseItemSet &item_set, size_t state_index) {
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (auto transition : sym_transitions(item_set, grammar)) {
Symbol symbol = transition.first;
ParseItemSet item_set = transition.second;
size_t new_state_index = add_parse_state(item_set);
parse_table.add_action(state_index, symbol, ParseAction::Shift(new_state_index));
ParseStateId new_state_id = add_parse_state(item_set);
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id));
}
}
void add_advance_actions(const LexItemSet &item_set, size_t state_index) {
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
for (auto transition : char_transitions(item_set, grammar)) {
CharacterSet rule = transition.first;
LexItemSet item_set = transition.second;
size_t new_state_index = add_lex_state(item_set);
lex_table.add_action(state_index, rule, LexAction::Advance(new_state_index));
LexStateId new_state_id = add_lex_state(item_set);
lex_table.add_action(state_id, rule, LexAction::Advance(new_state_id));
}
}
void add_accept_token_actions(const LexItemSet &item_set, size_t state_index) {
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
for (LexItem item : item_set) {
if (item.is_done()) {
lex_table.add_default_action(state_index, LexAction::Accept(item.lhs));
lex_table.add_default_action(state_id, LexAction::Accept(item.lhs));
}
}
}
void add_reduce_actions(const ParseItemSet &item_set, size_t state_index) {
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (ParseItem item : item_set) {
if (item.is_done()) {
ParseAction action = (item.lhs == START) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_symbols);
parse_table.add_action(state_index, item.lookahead_sym, action);
parse_table.add_action(state_id, item.lookahead_sym, action);
}
}
}
void assign_lex_state(size_t state_index) {
ParseState &state = parse_table.states[state_index];
void assign_lex_state(ParseStateId state_id) {
ParseState &state = parse_table.states[state_id];
LexItemSet item_set;
for (auto &symbol : state.expected_inputs()) {
if (symbol == END_OF_INPUT)
item_set.insert(LexItem(symbol, make_shared<CharacterSet>(std::set<rules::CharacterRange>{ '\0' })));
if (lex_grammar.has_definition(symbol))
item_set.insert(LexItem(symbol, lex_grammar.rule(symbol)));
}
state.lex_state_index = add_lex_state(item_set);
state.lex_state_id = add_lex_state(item_set);
}
size_t add_lex_state(const LexItemSet &item_set) {
auto state_index = lex_state_index_for_item_set(item_set);
if (state_index == NOT_FOUND) {
state_index = lex_table.add_state();
lex_state_indices[item_set] = state_index;
add_advance_actions(item_set, state_index);
add_accept_token_actions(item_set, state_index);
LexStateId add_lex_state(const LexItemSet &item_set) {
auto state_id = lex_state_id_for_item_set(item_set);
if (state_id == NOT_FOUND) {
state_id = lex_table.add_state();
lex_state_ids[item_set] = state_id;
add_advance_actions(item_set, state_id);
add_accept_token_actions(item_set, state_id);
}
return state_index;
return state_id;
}
size_t add_parse_state(const ParseItemSet &item_set) {
auto state_index = parse_state_index_for_item_set(item_set);
if (state_index == NOT_FOUND) {
state_index = parse_table.add_state();
parse_state_indices[item_set] = state_index;
ParseStateId add_parse_state(const ParseItemSet &item_set) {
auto state_id = parse_state_id_for_item_set(item_set);
if (state_id == NOT_FOUND) {
state_id = parse_table.add_state();
parse_state_ids[item_set] = state_id;
add_shift_actions(item_set, state_index);
add_reduce_actions(item_set, state_index);
assign_lex_state(state_index);
add_shift_actions(item_set, state_id);
add_reduce_actions(item_set, state_id);
assign_lex_state(state_id);
}
return state_index;
return state_id;
}
void add_error_lex_state() {
LexItemSet error_item_set;
for (auto &pair : lex_grammar.rules)
error_item_set.insert(LexItem(pair.first, pair.second));
add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID);
add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID);
}
// void dump_item_sets() {
// std::vector<const ParseItemSet *> item_sets(parse_state_indices.size());
// for (auto &pair : parse_state_indices)
// std::vector<const ParseItemSet *> item_sets(parse_state_ids.size());
// for (auto &pair : parse_state_ids)
// item_sets[pair.second] = &pair.first;
//
// for (int i = 0; i < item_sets.size(); i++) {
@ -135,6 +141,7 @@ namespace tree_sitter {
auto item = ParseItem(START, make_shared<Symbol>(grammar.start_rule_name), {}, END_OF_INPUT);
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
add_parse_state(item_set);
add_error_lex_state();
return pair<ParseTable, LexTable>(parse_table, lex_table);
}
};

View file

@ -164,17 +164,13 @@ namespace tree_sitter {
return input;
}
string lex_error_call(const set<rules::CharacterSet> &expected_inputs) {
rules::CharacterSet expected_set;
for (auto &rule : expected_inputs)
expected_set.add_set(rule);
string result = "LEX_ERROR(" + to_string(expected_set.ranges.size()) + ", EXPECT({";
string parse_error_call(const set<rules::Symbol> &expected_inputs) {
string result = "PARSE_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
bool started = false;
for (auto &range : expected_set.ranges) {
for (auto &symbol : expected_inputs) {
if (started) result += ", ";
started = true;
result += "\"" + escape_string(range.to_string()) + "\"";
result += symbol_id(symbol);
}
result += "}));";
return result;
@ -183,7 +179,7 @@ namespace tree_sitter {
string code_for_lex_actions(const set<LexAction> &actions, const set<rules::CharacterSet> &expected_inputs) {
auto action = actions.begin();
if (action == actions.end()) {
return lex_error_call(expected_inputs);
return "LEX_ERROR();";
} else {
switch (action->type) {
case LexActionTypeAdvance:
@ -198,11 +194,12 @@ namespace tree_sitter {
string code_for_parse_state(const ParseState &parse_state) {
string body = "";
auto expected_inputs = parse_state.expected_inputs();
for (auto pair : parse_state.actions)
body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second, parse_state.expected_inputs()));
body += _default("PARSE_PANIC();");
body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second, expected_inputs));
body += _default(parse_error_call(expected_inputs));
return
string("SET_LEX_STATE(") + to_string(parse_state.lex_state_index) + ");\n" +
string("SET_LEX_STATE(") + to_string(parse_state.lex_state_id) + ");\n" +
_switch("LOOKAHEAD_SYM()", body);
}
@ -227,6 +224,7 @@ namespace tree_sitter {
string body = "";
for (int i = 0; i < lex_table.states.size(); i++)
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
body += _case("ts_lex_state_error", switch_on_lookahead_char(lex_table.error_state));
body += _default("LEX_PANIC();");
return _switch("LEX_STATE()", body);
}

View file

@ -58,16 +58,25 @@ namespace tree_sitter {
return result;
}
size_t LexTable::add_state() {
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
void LexTable::add_action(size_t state_index, CharacterSet match, LexAction action) {
states[state_index].actions[match].insert(action);
LexState & state(LexTable *table, LexStateId id) {
if (id < 0)
return table->error_state;
else
return table->states[id];
}
void LexTable::add_default_action(size_t state_index, LexAction action) {
states[state_index].default_actions.insert(action);
void LexTable::add_action(LexStateId id, CharacterSet match, LexAction action) {
state(this, id).actions[match].insert(action);
}
void LexTable::add_default_action(LexStateId id, LexAction action) {
state(this, id).default_actions.insert(action);
}
const LexStateId LexTable::ERROR_STATE_ID = -1;
}

View file

@ -51,13 +51,17 @@ namespace tree_sitter {
std::set<rules::CharacterSet> expected_inputs() const;
};
typedef long int LexStateId;
class LexTable {
public:
size_t add_state();
void add_action(size_t state_index, rules::CharacterSet rule, LexAction action);
void add_default_action(size_t state_index, LexAction action);
static const LexStateId ERROR_STATE_ID;
LexStateId add_state();
void add_action(LexStateId state_id, rules::CharacterSet rule, LexAction action);
void add_default_action(LexStateId state_id, LexAction action);
std::vector<LexState> states;
LexState error_state;
};
}

View file

@ -58,7 +58,7 @@ namespace tree_sitter {
}
}
ParseState::ParseState() : lex_state_index(-1) {}
ParseState::ParseState() : lex_state_id(-1) {}
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
@ -86,13 +86,13 @@ namespace tree_sitter {
return stream;
}
size_t ParseTable::add_state() {
ParseStateId ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
void ParseTable::add_action(size_t state_index, Symbol symbol, ParseAction action) {
void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
symbols.insert(symbol);
states[state_index].actions[symbol].insert(action);
states[id].actions[symbol].insert(action);
}
}

View file

@ -5,6 +5,7 @@
#include <vector>
#include <set>
#include "rules/symbol.h"
#include "./lex_table.h"
namespace tree_sitter {
typedef enum {
@ -52,15 +53,17 @@ namespace tree_sitter {
ParseState();
std::map<rules::Symbol, std::set<ParseAction>> actions;
std::set<rules::Symbol> expected_inputs() const;
size_t lex_state_index;
LexStateId lex_state_id;
};
typedef unsigned long int ParseStateId;
std::ostream& operator<<(std::ostream &stream, const ParseState &state);
class ParseTable {
public:
size_t add_state();
void add_action(size_t state_index, rules::Symbol symbol, ParseAction action);
void add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action);
std::vector<ParseState> states;
std::set<rules::Symbol> symbols;

View file

@ -3,8 +3,9 @@
struct ts_document {
ts_parse_fn *parse_fn;
const char **symbol_names;
ts_error error;
ts_tree *tree;
const ts_tree *tree;
size_t error_count;
ts_tree **errors;
};
ts_document * ts_document_make() {
@ -21,18 +22,18 @@ void ts_document_set_parser(ts_document *document, ts_parse_config config) {
}
void ts_document_set_text(ts_document *document, const char *text) {
ts_parse_result result = document->parse_fn(text);
document->tree = result.tree;
document->error = result.error;
const ts_tree * result = document->parse_fn(text);
document->tree = result;
document->errors = NULL;
}
ts_tree * ts_document_tree(const ts_document *document) {
const ts_tree * ts_document_tree(const ts_document *document) {
return document->tree;
}
const char * ts_document_string(const ts_document *document) {
if (document->error.expected_inputs != NULL) {
return ts_error_string(&document->error);
if (document->error_count > 0) {
return ts_tree_error_string(document->errors[0], document->symbol_names);
} else {
return ts_tree_string(document->tree, document->symbol_names);
}

View file

@ -1,16 +0,0 @@
#include "tree_sitter/runtime.h"
#include <string>
#include "string.h"
using std::string;
const char * ts_error_string(const ts_error *error) {
string result = string("Unexpected character '") + error->lookahead_char + "'. Expected:";
for (int i = 0; i < error->expected_input_count; i++) {
result += string(" ") + error->expected_inputs[i];
}
char *stuff = (char *)malloc(result.size() * sizeof(char));
strcpy(stuff, result.c_str());
return stuff;
}

View file

@ -3,18 +3,43 @@
#include <string.h>
using std::string;
using std::to_string;
ts_tree * ts_tree_make(ts_symbol value, size_t child_count, ts_tree **children) {
const ts_symbol ts_symbol_error = -1;
ts_tree * ts_tree_make_leaf(ts_symbol symbol) {
ts_tree *result = new ts_tree();
result->value = value;
result->child_count = child_count;
result->children = children;
result->ref_count = 0;
result->symbol = symbol;
result->data.children = {
.count = 0,
.contents = NULL
};
return result;
}
ts_tree * ts_tree_make_node(ts_symbol symbol, size_t child_count, ts_tree **children) {
ts_tree *result = new ts_tree();
result->ref_count = 0;
result->symbol = symbol;
result->data.children = {
.count = child_count,
.contents = children
};
for (int i = 0; i < child_count; i++)
ts_tree_retain(children[i]);
return result;
}
ts_tree * ts_tree_make_error(char lookahead_char, size_t expected_input_count, const ts_symbol *expected_inputs) {
ts_tree *result = new ts_tree();
result->data.error = {
.lookahead_char = lookahead_char,
.expected_input_count = expected_input_count,
};
return result;
}
void ts_tree_retain(ts_tree *tree) {
tree->ref_count++;
}
@ -22,28 +47,46 @@ void ts_tree_retain(ts_tree *tree) {
void ts_tree_release(ts_tree *tree) {
tree->ref_count--;
if (tree->ref_count == 0) {
for (int i = 0; i < tree->child_count; i++)
ts_tree_release(tree->children[i]);
ts_tree **children = tree->data.children.contents;
for (int i = 0; i < ts_tree_child_count(tree); i++)
ts_tree_release(children[i]);
// free(children);
free(tree);
}
}
int ts_tree_equals(const ts_tree *node1, const ts_tree *node2) {
if (node1->value != node2->value) return 0;
if (node1->child_count != node2->child_count) return 0;
for (int i = 0; i < node1->child_count; i++) {
ts_tree *child1 = node1->children[i];
ts_tree *child2 = node2->children[i];
if (!ts_tree_equals(child1, child2)) return 0;
if (node1->symbol != node2->symbol) return 0;
if (node1->symbol == ts_symbol_error) {
// check error equality
} else {
if (node1->data.children.count != node2->data.children.count)
return 0;
for (int i = 0; i < node1->data.children.count; i++) {
ts_tree *child1 = node1->data.children.contents[i];
ts_tree *child2 = node2->data.children.contents[i];
if (!ts_tree_equals(child1, child2))
return 0;
}
}
return 1;
}
ts_tree ** ts_tree_children(const ts_tree *tree) {
if (tree->symbol == ts_symbol_error) return NULL;
return tree->data.children.contents;
}
size_t ts_tree_child_count(const ts_tree *tree) {
if (tree->symbol == ts_symbol_error) return 0;
return tree->data.children.count;
}
static string __tree_to_string(const ts_tree *tree, const char **symbol_names) {
if (!tree) return "#<null-tree>";
string result = string("(") + symbol_names[tree->value];
for (int i = 0; i < tree->child_count; i++)
result += " " + __tree_to_string(tree->children[i], symbol_names);
string result = string("(") + symbol_names[tree->symbol];
for (int i = 0; i < tree->data.children.count; i++)
result += " " + __tree_to_string(tree->data.children.contents[i], symbol_names);
return result + ")";
}
@ -53,3 +96,15 @@ char * ts_tree_string(const ts_tree *tree, const char **symbol_names) {
strcpy(result, value.c_str());
return result;
}
char * ts_tree_error_string(const ts_tree *tree, const char **symbol_names) {
string result = string("Unexpected character '") + tree->data.error.lookahead_char + "'. Expected:";
for (int i = 0; i < tree->data.error.expected_input_count; i++) {
ts_symbol symbol = tree->data.error.expected_inputs[i];
result += string(" ") + symbol_names[symbol];
}
char *stuff = (char *)malloc(result.size() * sizeof(char));
strcpy(stuff, result.c_str());
return stuff;
}

View file

@ -2,6 +2,11 @@ TODO
====
## batch parsing
- error handling: keep going when errors are encountered and put error nodes into the AST
- more example grammars:
- go
- javascript
- ruby
- add comments to generated C code describing the generated tokens (regexp pattern)
- fix any memory leaks
- add special lexical behavior for indentation-aware languages

View file

@ -23,7 +23,6 @@
127528B518AACB70006B682B /* rule_can_be_blank_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 127528B418AACB70006B682B /* rule_can_be_blank_spec.cpp */; };
12AB465F188BD03E00DE79DF /* follow_sets.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12AB465D188BD03E00DE79DF /* follow_sets.cpp */; };
12AB4661188CB3A300DE79DF /* item_set_closure_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */; };
12BC470518822B27005AC502 /* error.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470318822A17005AC502 /* error.cpp */; };
12BC470718830BC5005AC502 /* first_set_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470618830BC5005AC502 /* first_set_spec.cpp */; };
12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; };
12E75A9A1891BF57001B8F10 /* json.c in Sources */ = {isa = PBXBuildFile; fileRef = 12E75A981891BF3B001B8F10 /* json.c */; };
@ -117,7 +116,6 @@
12AB465E188BD03E00DE79DF /* follow_sets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = follow_sets.h; sourceTree = "<group>"; };
12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = item_set_closure_spec.cpp; sourceTree = "<group>"; };
12AB4663188DCB9800DE79DF /* stream_methods.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = stream_methods.h; sourceTree = "<group>"; };
12BC470318822A17005AC502 /* error.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = error.cpp; sourceTree = "<group>"; };
12BC470618830BC5005AC502 /* first_set_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = first_set_spec.cpp; sourceTree = "<group>"; };
12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = "<group>"; };
12D136A0183570F5005F3369 /* pattern_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pattern_spec.cpp; path = spec/compiler/rules/pattern_spec.cpp; sourceTree = SOURCE_ROOT; };
@ -395,7 +393,6 @@
isa = PBXGroup;
children = (
12EDCF8C187C6282005A7A07 /* document.cpp */,
12BC470318822A17005AC502 /* error.cpp */,
12FD40DE1860064C0041A84E /* tree.cpp */,
);
path = runtime;
@ -563,7 +560,6 @@
12E75A9C1891C17D001B8F10 /* json_spec.cpp in Sources */,
12FD40DB185FEF0D0041A84E /* arithmetic_spec.cpp in Sources */,
12FD40C2185EEB5E0041A84E /* main.cpp in Sources */,
12BC470518822B27005AC502 /* error.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};