Separate functions for building parse and lex tables
Now, instead of adding states to the lex table as they are needed by the parse states, we iterate over the parse states after the fact and set up their corresponding lex states. This has the nice side effect that the lex states are in a more readable order.
This commit is contained in:
parent
b998bb35af
commit
1d314d71c2
15 changed files with 4535 additions and 4402 deletions
|
|
@ -58,147 +58,148 @@ LEX_FN() {
|
|||
switch (lex_state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '*')
|
||||
if (lookahead == '(')
|
||||
ADVANCE(1);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '+')
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(3);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(6);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
ACCEPT_TOKEN(ts_aux_sym_token5);
|
||||
case 2:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token2);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(2);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token0);
|
||||
if (('0' <= lookahead && lookahead <= '9') ||
|
||||
('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == '_') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(3);
|
||||
ACCEPT_TOKEN(ts_sym_variable);
|
||||
case 4:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token1);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(5);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(4);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
case 5:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token3);
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 6:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token4);
|
||||
ACCEPT_TOKEN(ts_aux_sym_token2);
|
||||
case 7:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token0);
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token1);
|
||||
case 9:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token3);
|
||||
case 10:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token4);
|
||||
case 11:
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(7);
|
||||
ADVANCE(11);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(8);
|
||||
ADVANCE(12);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
case 8:
|
||||
case 12:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token6);
|
||||
case 9:
|
||||
case 13:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(9);
|
||||
ADVANCE(13);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR();
|
||||
case 10:
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(10);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(11);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(12);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(13);
|
||||
LEX_ERROR();
|
||||
case 11:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token5);
|
||||
case 12:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(12);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 13:
|
||||
if (('0' <= lookahead && lookahead <= '9') ||
|
||||
('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == '_') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(13);
|
||||
ACCEPT_TOKEN(ts_sym_variable);
|
||||
case 14:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
ADVANCE(5);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(14);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(11);
|
||||
ADVANCE(1);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(5);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(12);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(2);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(13);
|
||||
ADVANCE(3);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(6);
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
ADVANCE(5);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(14);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(11);
|
||||
ADVANCE(1);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(5);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(12);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(2);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(13);
|
||||
ADVANCE(3);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(6);
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_PANIC();
|
||||
|
|
@ -206,38 +207,38 @@ LEX_FN() {
|
|||
}
|
||||
|
||||
LEX_STATES = {
|
||||
[0] = 10,
|
||||
[1] = 0,
|
||||
[2] = 10,
|
||||
[3] = 0,
|
||||
[4] = 10,
|
||||
[5] = 0,
|
||||
[6] = 10,
|
||||
[7] = 0,
|
||||
[8] = 10,
|
||||
[9] = 0,
|
||||
[10] = 10,
|
||||
[11] = 0,
|
||||
[12] = 0,
|
||||
[13] = 10,
|
||||
[14] = 7,
|
||||
[15] = 10,
|
||||
[16] = 7,
|
||||
[17] = 10,
|
||||
[18] = 7,
|
||||
[19] = 10,
|
||||
[20] = 7,
|
||||
[21] = 10,
|
||||
[22] = 7,
|
||||
[23] = 10,
|
||||
[24] = 7,
|
||||
[25] = 7,
|
||||
[26] = 10,
|
||||
[27] = 7,
|
||||
[28] = 7,
|
||||
[29] = 9,
|
||||
[30] = 0,
|
||||
[31] = 9,
|
||||
[0] = 0,
|
||||
[1] = 4,
|
||||
[2] = 0,
|
||||
[3] = 4,
|
||||
[4] = 0,
|
||||
[5] = 4,
|
||||
[6] = 0,
|
||||
[7] = 4,
|
||||
[8] = 0,
|
||||
[9] = 4,
|
||||
[10] = 0,
|
||||
[11] = 4,
|
||||
[12] = 4,
|
||||
[13] = 0,
|
||||
[14] = 11,
|
||||
[15] = 0,
|
||||
[16] = 11,
|
||||
[17] = 0,
|
||||
[18] = 11,
|
||||
[19] = 0,
|
||||
[20] = 11,
|
||||
[21] = 0,
|
||||
[22] = 11,
|
||||
[23] = 0,
|
||||
[24] = 11,
|
||||
[25] = 11,
|
||||
[26] = 0,
|
||||
[27] = 11,
|
||||
[28] = 11,
|
||||
[29] = 13,
|
||||
[30] = 4,
|
||||
[31] = 13,
|
||||
};
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -59,230 +59,230 @@ LEX_FN() {
|
|||
switch (lex_state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(1);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(9);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(10);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(15);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(19);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(23);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(2);
|
||||
if (lookahead == '}')
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case 2:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token3);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 4:
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(4);
|
||||
if (lookahead == ',')
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case 5:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token2);
|
||||
case 6:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(6);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
LEX_ERROR();
|
||||
case 7:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token5);
|
||||
case 8:
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(8);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(5);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
LEX_ERROR();
|
||||
case 9:
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(9);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(18);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(19);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(24);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(28);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(32);
|
||||
LEX_ERROR();
|
||||
case 10:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(11);
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
ADVANCE(4);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 6:
|
||||
if (lookahead == '.')
|
||||
ADVANCE(7);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(6);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 7:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR();
|
||||
case 8:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(8);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 9:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token4);
|
||||
case 10:
|
||||
if (lookahead == 'a')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 11:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\"')
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(12);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
LEX_ERROR();
|
||||
case 12:
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 13:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(14);
|
||||
if (lookahead == '\\')
|
||||
if (lookahead == 's')
|
||||
ADVANCE(13);
|
||||
LEX_ERROR();
|
||||
case 13:
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(14);
|
||||
LEX_ERROR();
|
||||
case 14:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(12);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
ACCEPT_TOKEN(ts_sym_false);
|
||||
case 15:
|
||||
if (lookahead == '.')
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(16);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
LEX_ERROR();
|
||||
case 16:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(17);
|
||||
LEX_ERROR();
|
||||
case 17:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(17);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(18);
|
||||
LEX_ERROR();
|
||||
case 18:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token4);
|
||||
ACCEPT_TOKEN(ts_sym_null);
|
||||
case 19:
|
||||
if (lookahead == 'a')
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(20);
|
||||
LEX_ERROR();
|
||||
case 20:
|
||||
if (lookahead == 'l')
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(21);
|
||||
LEX_ERROR();
|
||||
case 21:
|
||||
if (lookahead == 's')
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(22);
|
||||
LEX_ERROR();
|
||||
case 22:
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(23);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_sym_true);
|
||||
case 23:
|
||||
ACCEPT_TOKEN(ts_sym_false);
|
||||
ACCEPT_TOKEN(ts_aux_sym_token0);
|
||||
case 24:
|
||||
if (lookahead == 'u')
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(25);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(24);
|
||||
LEX_ERROR();
|
||||
case 25:
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(26);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 26:
|
||||
if (lookahead == 'l')
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(26);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(1);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 27:
|
||||
ACCEPT_TOKEN(ts_sym_null);
|
||||
ACCEPT_TOKEN(ts_aux_sym_token3);
|
||||
case 28:
|
||||
if (lookahead == 'r')
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(28);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(29);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 29:
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(30);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_aux_sym_token2);
|
||||
case 30:
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(31);
|
||||
LEX_ERROR();
|
||||
case 31:
|
||||
ACCEPT_TOKEN(ts_sym_true);
|
||||
case 32:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token0);
|
||||
case 33:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(33);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(34);
|
||||
ADVANCE(30);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 34:
|
||||
case 31:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(31);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR();
|
||||
case 32:
|
||||
START_TOKEN();
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(32);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(33);
|
||||
LEX_ERROR();
|
||||
case 33:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token1);
|
||||
case 35:
|
||||
case 34:
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(35);
|
||||
ADVANCE(34);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(1);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(9);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(35);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(3);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(15);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(19);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(23);
|
||||
LEX_ERROR();
|
||||
case 35:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token5);
|
||||
case 36:
|
||||
START_TOKEN();
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(36);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(18);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(29);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(19);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(24);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(28);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(32);
|
||||
ADVANCE(35);
|
||||
LEX_ERROR();
|
||||
case 37:
|
||||
START_TOKEN();
|
||||
|
|
@ -291,69 +291,70 @@ LEX_FN() {
|
|||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(37);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(35);
|
||||
LEX_ERROR();
|
||||
case 38:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
ADVANCE(25);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(38);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
ADVANCE(1);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(5);
|
||||
ADVANCE(29);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
ADVANCE(6);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(34);
|
||||
ADVANCE(33);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(18);
|
||||
ADVANCE(9);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
ADVANCE(35);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(19);
|
||||
ADVANCE(10);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(24);
|
||||
ADVANCE(15);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(28);
|
||||
ADVANCE(19);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(32);
|
||||
ADVANCE(23);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(3);
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
ADVANCE(25);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(38);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
ADVANCE(1);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(5);
|
||||
ADVANCE(29);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
ADVANCE(6);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(34);
|
||||
ADVANCE(33);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(18);
|
||||
ADVANCE(9);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
ADVANCE(35);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(19);
|
||||
ADVANCE(10);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(24);
|
||||
ADVANCE(15);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(28);
|
||||
ADVANCE(19);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(32);
|
||||
ADVANCE(23);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(3);
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_PANIC();
|
||||
|
|
@ -361,66 +362,66 @@ LEX_FN() {
|
|||
}
|
||||
|
||||
LEX_STATES = {
|
||||
[0] = 9,
|
||||
[1] = 0,
|
||||
[2] = 0,
|
||||
[3] = 35,
|
||||
[4] = 4,
|
||||
[5] = 2,
|
||||
[6] = 0,
|
||||
[7] = 37,
|
||||
[8] = 4,
|
||||
[9] = 2,
|
||||
[10] = 33,
|
||||
[11] = 9,
|
||||
[12] = 4,
|
||||
[13] = 2,
|
||||
[14] = 4,
|
||||
[15] = 35,
|
||||
[16] = 4,
|
||||
[17] = 2,
|
||||
[18] = 4,
|
||||
[19] = 33,
|
||||
[20] = 9,
|
||||
[21] = 4,
|
||||
[22] = 2,
|
||||
[23] = 4,
|
||||
[24] = 36,
|
||||
[25] = 8,
|
||||
[26] = 6,
|
||||
[27] = 4,
|
||||
[28] = 9,
|
||||
[29] = 8,
|
||||
[30] = 6,
|
||||
[31] = 8,
|
||||
[32] = 35,
|
||||
[33] = 4,
|
||||
[34] = 2,
|
||||
[35] = 8,
|
||||
[36] = 33,
|
||||
[37] = 9,
|
||||
[38] = 4,
|
||||
[39] = 2,
|
||||
[40] = 8,
|
||||
[41] = 8,
|
||||
[42] = 36,
|
||||
[43] = 8,
|
||||
[44] = 6,
|
||||
[45] = 8,
|
||||
[46] = 8,
|
||||
[47] = 4,
|
||||
[48] = 4,
|
||||
[49] = 33,
|
||||
[50] = 9,
|
||||
[51] = 4,
|
||||
[52] = 2,
|
||||
[53] = 0,
|
||||
[54] = 0,
|
||||
[55] = 36,
|
||||
[56] = 8,
|
||||
[57] = 6,
|
||||
[58] = 0,
|
||||
[59] = 0,
|
||||
[0] = 0,
|
||||
[1] = 24,
|
||||
[2] = 24,
|
||||
[3] = 26,
|
||||
[4] = 28,
|
||||
[5] = 30,
|
||||
[6] = 24,
|
||||
[7] = 31,
|
||||
[8] = 28,
|
||||
[9] = 30,
|
||||
[10] = 32,
|
||||
[11] = 0,
|
||||
[12] = 28,
|
||||
[13] = 30,
|
||||
[14] = 28,
|
||||
[15] = 26,
|
||||
[16] = 28,
|
||||
[17] = 30,
|
||||
[18] = 28,
|
||||
[19] = 32,
|
||||
[20] = 0,
|
||||
[21] = 28,
|
||||
[22] = 30,
|
||||
[23] = 28,
|
||||
[24] = 34,
|
||||
[25] = 36,
|
||||
[26] = 37,
|
||||
[27] = 28,
|
||||
[28] = 0,
|
||||
[29] = 36,
|
||||
[30] = 37,
|
||||
[31] = 36,
|
||||
[32] = 26,
|
||||
[33] = 28,
|
||||
[34] = 30,
|
||||
[35] = 36,
|
||||
[36] = 32,
|
||||
[37] = 0,
|
||||
[38] = 28,
|
||||
[39] = 30,
|
||||
[40] = 36,
|
||||
[41] = 36,
|
||||
[42] = 34,
|
||||
[43] = 36,
|
||||
[44] = 37,
|
||||
[45] = 36,
|
||||
[46] = 36,
|
||||
[47] = 28,
|
||||
[48] = 28,
|
||||
[49] = 32,
|
||||
[50] = 0,
|
||||
[51] = 28,
|
||||
[52] = 30,
|
||||
[53] = 24,
|
||||
[54] = 24,
|
||||
[55] = 34,
|
||||
[56] = 36,
|
||||
[57] = 37,
|
||||
[58] = 24,
|
||||
[59] = 24,
|
||||
};
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/conflict_manager.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -8,7 +9,6 @@ START_TEST
|
|||
|
||||
describe("resolving parse conflicts", []() {
|
||||
bool should_update;
|
||||
ConflictManager *manager;
|
||||
|
||||
PreparedGrammar parse_grammar({
|
||||
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
|
||||
|
|
@ -21,19 +21,22 @@ describe("resolving parse conflicts", []() {
|
|||
{ "token3", keyword("stuff") },
|
||||
}, {});
|
||||
|
||||
before_each([&]() {
|
||||
manager = new ConflictManager(parse_grammar, lex_grammar);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
delete manager;
|
||||
});
|
||||
|
||||
describe("lexical conflicts", [&]() {
|
||||
Symbol sym1(0, SymbolOptionToken);
|
||||
Symbol sym2(1, SymbolOptionToken);
|
||||
Symbol sym3(2, SymbolOptionToken);
|
||||
|
||||
LexConflictManager *manager;
|
||||
|
||||
|
||||
before_each([&]() {
|
||||
manager = new LexConflictManager(lex_grammar);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
delete manager;
|
||||
});
|
||||
|
||||
it("favors non-errors over lexical errors", [&]() {
|
||||
should_update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2));
|
||||
AssertThat(should_update, IsTrue());
|
||||
|
|
@ -68,6 +71,15 @@ describe("resolving parse conflicts", []() {
|
|||
describe("syntactic conflicts", [&]() {
|
||||
Symbol sym1(0);
|
||||
Symbol sym2(1);
|
||||
ParseConflictManager *manager;
|
||||
|
||||
before_each([&]() {
|
||||
manager = new ParseConflictManager(parse_grammar, lex_grammar);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
delete manager;
|
||||
});
|
||||
|
||||
it("favors non-errors over parse errors", [&]() {
|
||||
should_update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
|
||||
|
|
|
|||
133
src/compiler/build_tables/build_lex_table.cc
Normal file
133
src/compiler/build_tables/build_lex_table.cc
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
#include "compiler/build_tables/build_tables.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::unordered_map;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
|
||||
namespace build_tables {
|
||||
class LexTableBuilder {
|
||||
const PreparedGrammar lex_grammar;
|
||||
ParseTable *parse_table;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
LexTable lex_table;
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = char_transitions(item_set, lex_grammar);
|
||||
for (auto transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(new_item_set);
|
||||
lex_table.add_action(state_id, rule, LexAction::Advance(new_state_id));
|
||||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (auto &item : item_set)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (LexItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
auto current_action = lex_table.state(state_id).default_action;
|
||||
auto new_action = LexAction::Accept(item.lhs, item.precedence());
|
||||
if (conflict_manager.resolve_lex_action(current_action, new_action))
|
||||
lex_table.add_default_action(state_id, new_action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build({
|
||||
make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
|
||||
make_shared<rules::Metadata>(make_shared<rules::Blank>(), map<rules::MetadataKey, int>({
|
||||
{rules::START_TOKEN, 1},
|
||||
})),
|
||||
rule
|
||||
});
|
||||
}
|
||||
|
||||
LexItemSet lex_item_set_for_parse_state(const ParseState &state) {
|
||||
LexItemSet result;
|
||||
for (auto &symbol : state.expected_inputs()) {
|
||||
if (symbol.is_token() && !symbol.is_built_in())
|
||||
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void assign_lex_state(ParseState *state) {
|
||||
state->lex_state_id = add_lex_state(lex_item_set_for_parse_state(*state));
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
auto pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.add_state();
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_token_start(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void add_error_lex_state() {
|
||||
LexItemSet error_item_set;
|
||||
for (size_t i = 0; i < lex_grammar.rules.size(); i++) {
|
||||
LexItem item(Symbol(i, rules::SymbolOptionToken), after_separators(lex_grammar.rules[i].second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
for (size_t i = 0; i < lex_grammar.aux_rules.size(); i++) {
|
||||
LexItem item(Symbol(i, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary)), after_separators(lex_grammar.aux_rules[i].second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
error_item_set.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy())));
|
||||
add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
}
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
|
||||
lex_grammar(lex_grammar),
|
||||
parse_table(parse_table),
|
||||
conflict_manager(LexConflictManager(lex_grammar)) {}
|
||||
|
||||
LexTable build() {
|
||||
for (auto &parse_state : parse_table->states)
|
||||
assign_lex_state(&parse_state);
|
||||
add_error_lex_state();
|
||||
return lex_table;
|
||||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar) {
|
||||
return LexTableBuilder(parse_table, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
}
|
||||
16
src/compiler/build_tables/build_lex_table.h
Normal file
16
src/compiler/build_tables/build_lex_table.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
#define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
class ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
105
src/compiler/build_tables/build_parse_table.cc
Normal file
105
src/compiler/build_tables/build_parse_table.cc
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::unordered_map;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
|
||||
namespace build_tables {
|
||||
class ParseTableBuilder {
|
||||
const PreparedGrammar grammar;
|
||||
ParseConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
SymTransitions sym_transitions;
|
||||
ParseTable parse_table;
|
||||
|
||||
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
if (item.consumed_symbol_count > 0)
|
||||
result.insert(item.precedence());
|
||||
return result;
|
||||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &item_set = transition.second;
|
||||
set<int> precedence_values = precedence_values_for_item_set(item_set);
|
||||
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(symbol);
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
|
||||
ParseStateId new_state_id = add_parse_state(item_set);
|
||||
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (ParseItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
ParseAction action = (item.lhs == rules::START()) ?
|
||||
ParseAction::Accept() :
|
||||
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(item.lookahead_sym);
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) {
|
||||
parse_table.add_action(state_id, item.lookahead_sym, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
ParseStateId state_id = parse_table.add_state();
|
||||
parse_state_ids[item_set] = state_id;
|
||||
add_shift_actions(item_set, state_id);
|
||||
add_reduce_actions(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
|
||||
|
||||
pair<ParseTable, vector<Conflict>> build() {
|
||||
ParseItem start_item(rules::START(), make_shared<Symbol>(0), 0, rules::END_OF_INPUT());
|
||||
add_parse_state(item_set_closure(start_item, grammar));
|
||||
return { parse_table, conflict_manager.conflicts() };
|
||||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, vector<Conflict>>
|
||||
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) {
|
||||
return ParseTableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
}
|
||||
18
src/compiler/build_tables/build_parse_table.h
Normal file
18
src/compiler/build_tables/build_parse_table.h
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_
|
||||
#define COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::pair<ParseTable, std::vector<Conflict>>
|
||||
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_
|
||||
|
|
@ -1,211 +1,21 @@
|
|||
#include "compiler/build_tables/build_tables.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/build_tables/conflict_manager.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/build_tables/build_lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::tuple;
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::unordered_map;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
using std::make_tuple;
|
||||
|
||||
namespace build_tables {
|
||||
class TableBuilder {
|
||||
const PreparedGrammar grammar;
|
||||
const PreparedGrammar lex_grammar;
|
||||
ConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
|
||||
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
if (item.consumed_symbol_count > 0)
|
||||
result.insert(item.precedence());
|
||||
return result;
|
||||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &item_set = transition.second;
|
||||
set<int> precedence_values = precedence_values_for_item_set(item_set);
|
||||
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(symbol);
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
|
||||
ParseStateId new_state_id = add_parse_state(item_set);
|
||||
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = char_transitions(item_set, grammar);
|
||||
for (auto transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(new_item_set);
|
||||
lex_table.add_action(state_id, rule, LexAction::Advance(new_state_id));
|
||||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (auto &item : item_set)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (LexItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
auto current_action = lex_table.state(state_id).default_action;
|
||||
auto new_action = LexAction::Accept(item.lhs, item.precedence());
|
||||
if (conflict_manager.resolve_lex_action(current_action, new_action))
|
||||
lex_table.add_default_action(state_id, new_action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (ParseItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
ParseAction action = (item.lhs == rules::START()) ?
|
||||
ParseAction::Accept() :
|
||||
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(item.lookahead_sym);
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) {
|
||||
parse_table.add_action(state_id, item.lookahead_sym, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build({
|
||||
make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
|
||||
make_shared<rules::Metadata>(make_shared<rules::Blank>(), map<rules::MetadataKey, int>({
|
||||
{rules::START_TOKEN, 1},
|
||||
})),
|
||||
rule
|
||||
});
|
||||
}
|
||||
|
||||
LexItemSet lex_item_set_for_parse_state(const ParseState &state) {
|
||||
LexItemSet result;
|
||||
for (auto &symbol : state.expected_inputs()) {
|
||||
if (symbol.is_token() && !symbol.is_built_in())
|
||||
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void assign_lex_state(ParseStateId state_id) {
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
state.lex_state_id = add_lex_state(lex_item_set_for_parse_state(state));
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
auto pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.add_state();
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_token_start(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
ParseStateId state_id = parse_table.add_state();
|
||||
parse_state_ids[item_set] = state_id;
|
||||
add_shift_actions(item_set, state_id);
|
||||
add_reduce_actions(item_set, state_id);
|
||||
assign_lex_state(state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void add_error_lex_state() {
|
||||
LexItemSet error_item_set;
|
||||
for (size_t i = 0; i < lex_grammar.rules.size(); i++) {
|
||||
LexItem item(Symbol(i, rules::SymbolOptionToken), after_separators(lex_grammar.rules[i].second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
for (size_t i = 0; i < lex_grammar.aux_rules.size(); i++) {
|
||||
LexItem item(Symbol(i, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary)), after_separators(lex_grammar.aux_rules[i].second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
error_item_set.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy())));
|
||||
add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
}
|
||||
|
||||
public:
|
||||
TableBuilder(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
lex_grammar(lex_grammar),
|
||||
conflict_manager(ConflictManager(grammar, lex_grammar))
|
||||
{}
|
||||
|
||||
void build() {
|
||||
auto start_symbol = make_shared<Symbol>(0);
|
||||
ParseItem item(rules::START(), start_symbol, {}, rules::END_OF_INPUT());
|
||||
ParseItemSet item_set = item_set_closure(item, grammar);
|
||||
add_parse_state(item_set);
|
||||
add_error_lex_state();
|
||||
}
|
||||
|
||||
const vector<Conflict> conflicts() {
|
||||
return conflict_manager.conflicts();
|
||||
}
|
||||
|
||||
SymTransitions sym_transitions;
|
||||
ParseTable parse_table;
|
||||
LexTable lex_table;
|
||||
};
|
||||
|
||||
tuple<ParseTable, LexTable, vector<Conflict>>
|
||||
build_tables(const PreparedGrammar &grammar,
|
||||
const PreparedGrammar &lex_grammar) {
|
||||
TableBuilder builder(grammar, lex_grammar);
|
||||
builder.build();
|
||||
return make_tuple(builder.parse_table, builder.lex_table, builder.conflicts());
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
vector<Conflict> conflicts = parse_table_result.second;
|
||||
auto lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
return make_tuple(parse_table, lex_table, conflicts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@
|
|||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lex_table.h"
|
||||
|
|
|
|||
37
src/compiler/build_tables/lex_conflict_manager.cc
Normal file
37
src/compiler/build_tables/lex_conflict_manager.cc
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
LexConflictManager::LexConflictManager(const PreparedGrammar &grammar) :
|
||||
grammar(grammar) {}
|
||||
|
||||
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action) {
|
||||
switch (old_action.type) {
|
||||
case LexActionTypeError:
|
||||
return true;
|
||||
case LexActionTypeAccept:
|
||||
if (new_action.precedence > old_action.precedence) {
|
||||
return true;
|
||||
} else if (new_action.precedence < old_action.precedence) {
|
||||
return false;
|
||||
} else {
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
21
src/compiler/build_tables/lex_conflict_manager.h
Normal file
21
src/compiler/build_tables/lex_conflict_manager.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
#define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lex_table.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class LexConflictManager {
|
||||
const PreparedGrammar grammar;
|
||||
|
||||
public:
|
||||
explicit LexConflictManager(const PreparedGrammar &grammar);
|
||||
bool resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
#include "compiler/build_tables/conflict_manager.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
|
@ -13,12 +13,12 @@ namespace tree_sitter {
|
|||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
ConflictManager::ConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar) :
|
||||
ParseConflictManager::ParseConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar) :
|
||||
parse_grammar(parse_grammar),
|
||||
lex_grammar(lex_grammar) {}
|
||||
|
||||
bool ConflictManager::resolve_parse_action(const rules::Symbol &symbol,
|
||||
bool ParseConflictManager::resolve_parse_action(const rules::Symbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action) {
|
||||
if (new_action.type < old_action.type)
|
||||
|
|
@ -70,25 +70,7 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
bool ConflictManager::resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action) {
|
||||
switch (old_action.type) {
|
||||
case LexActionTypeError:
|
||||
return true;
|
||||
case LexActionTypeAccept:
|
||||
if (new_action.precedence > old_action.precedence) {
|
||||
return true;
|
||||
} else if (new_action.precedence < old_action.precedence) {
|
||||
return false;
|
||||
} else {
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const vector<Conflict> ConflictManager::conflicts() const {
|
||||
const vector<Conflict> ParseConflictManager::conflicts() const {
|
||||
vector<Conflict> result;
|
||||
result.insert(result.end(), conflicts_.begin(), conflicts_.end());
|
||||
return result;
|
||||
|
|
@ -123,7 +105,7 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
void ConflictManager::record_conflict(const rules::Symbol &symbol,
|
||||
void ParseConflictManager::record_conflict(const rules::Symbol &symbol,
|
||||
const ParseAction &left,
|
||||
const ParseAction &right) {
|
||||
string name = symbol.is_token() ?
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_
|
||||
#define COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_
|
||||
#ifndef COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
|
||||
#define COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
|
@ -12,17 +12,14 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class ConflictManager {
|
||||
class ParseConflictManager {
|
||||
const PreparedGrammar parse_grammar;
|
||||
const PreparedGrammar lex_grammar;
|
||||
std::set<Conflict> conflicts_;
|
||||
|
||||
public:
|
||||
ConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar);
|
||||
|
||||
bool resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action);
|
||||
ParseConflictManager(const PreparedGrammar &parse_grammar,
|
||||
const PreparedGrammar &lex_grammar);
|
||||
bool resolve_parse_action(const rules::Symbol &symbol,
|
||||
const ParseAction &old_action,
|
||||
const ParseAction &new_action);
|
||||
|
|
@ -33,4 +30,4 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_CONFLICT_MANAGER_H_
|
||||
#endif // COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
|
||||
Loading…
Add table
Add a link
Reference in a new issue