Fix handling of tokens consisting of separator characters
The parser is no longer hard-coded to skip whitespace. Tokens such as newlines, whose characters overlap with the separator characters, can now be correctly recognized.
This commit is contained in:
parent
f39cb1890d
commit
1cc7e32e2d
32 changed files with 5401 additions and 4847 deletions
|
|
@ -112,7 +112,7 @@ namespace tree_sitter_examples {
|
|||
str("]") }) },
|
||||
|
||||
// Keywords
|
||||
{ "_terminator", choice({ str(";"), str("\n") }) },
|
||||
{ "_terminator", pattern("[;\n]") },
|
||||
{ "_var", str("var") },
|
||||
{ "_for", str("for") },
|
||||
{ "_if", str("if") },
|
||||
|
|
|
|||
|
|
@ -66,72 +66,113 @@ LEX_FN() {
|
|||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token2);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR();
|
||||
case 3:
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_aux_sym_token2);
|
||||
case 4:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token3);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(4);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
LEX_ERROR();
|
||||
case 5:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token4);
|
||||
ACCEPT_TOKEN(ts_aux_sym_token3);
|
||||
case 6:
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_aux_sym_token4);
|
||||
case 7:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token5);
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token6);
|
||||
case 9:
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '*')
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '/')
|
||||
if (lookahead == ')')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
LEX_ERROR();
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token5);
|
||||
case 9:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token6);
|
||||
case 10:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token7);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(10);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 11:
|
||||
if (lookahead == '(')
|
||||
ACCEPT_TOKEN(ts_aux_sym_token7);
|
||||
case 12:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(12);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
if (lookahead == '(')
|
||||
ADVANCE(13);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(14);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 12:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token1);
|
||||
case 13:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(14);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
ACCEPT_TOKEN(ts_aux_sym_token1);
|
||||
case 14:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(14);
|
||||
|
|
@ -141,123 +182,227 @@ LEX_FN() {
|
|||
('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == '_') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(16);
|
||||
ADVANCE(15);
|
||||
ACCEPT_TOKEN(ts_sym_variable);
|
||||
case 16:
|
||||
if (('0' <= lookahead && lookahead <= '9') ||
|
||||
('A' <= lookahead && lookahead <= 'Z') ||
|
||||
(lookahead == '_') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(16);
|
||||
ACCEPT_TOKEN(ts_sym_variable);
|
||||
case 17:
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
ADVANCE(3);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 17:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(17);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 18:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(18);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
ADVANCE(3);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
LEX_ERROR();
|
||||
case 19:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(19);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
ADVANCE(3);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 20:
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(20);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 21:
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(21);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
LEX_ERROR();
|
||||
case 22:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(22);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 23:
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(23);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
LEX_ERROR();
|
||||
case 24:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(24);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 25:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(25);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
LEX_ERROR();
|
||||
case 26:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(26);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 27:
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(27);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(12);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(7);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(4);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(8);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(13);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(14);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(15);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(10);
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(27);
|
||||
if (lookahead == '(')
|
||||
ADVANCE(13);
|
||||
if (lookahead == ')')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '*')
|
||||
ADVANCE(8);
|
||||
if (lookahead == '+')
|
||||
ADVANCE(5);
|
||||
if (lookahead == '-')
|
||||
ADVANCE(6);
|
||||
if (lookahead == '/')
|
||||
ADVANCE(9);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(14);
|
||||
if (('A' <= lookahead && lookahead <= 'Z') ||
|
||||
('a' <= lookahead && lookahead <= 'z'))
|
||||
ADVANCE(15);
|
||||
if (lookahead == '^')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_PANIC();
|
||||
|
|
@ -265,140 +410,140 @@ LEX_FN() {
|
|||
}
|
||||
|
||||
LEX_STATES = {
|
||||
[0] = 11,
|
||||
[1] = 24,
|
||||
[2] = 11,
|
||||
[0] = 12,
|
||||
[1] = 23,
|
||||
[2] = 12,
|
||||
[3] = 0,
|
||||
[4] = 22,
|
||||
[5] = 11,
|
||||
[4] = 21,
|
||||
[5] = 12,
|
||||
[6] = 0,
|
||||
[7] = 21,
|
||||
[8] = 11,
|
||||
[7] = 20,
|
||||
[8] = 12,
|
||||
[9] = 0,
|
||||
[10] = 0,
|
||||
[11] = 11,
|
||||
[12] = 3,
|
||||
[13] = 11,
|
||||
[14] = 1,
|
||||
[15] = 19,
|
||||
[16] = 11,
|
||||
[17] = 1,
|
||||
[18] = 18,
|
||||
[19] = 11,
|
||||
[20] = 1,
|
||||
[21] = 1,
|
||||
[22] = 11,
|
||||
[23] = 6,
|
||||
[24] = 11,
|
||||
[25] = 3,
|
||||
[26] = 17,
|
||||
[27] = 11,
|
||||
[28] = 3,
|
||||
[29] = 3,
|
||||
[30] = 11,
|
||||
[31] = 9,
|
||||
[32] = 11,
|
||||
[33] = 6,
|
||||
[34] = 6,
|
||||
[35] = 11,
|
||||
[36] = 1,
|
||||
[37] = 6,
|
||||
[38] = 1,
|
||||
[39] = 6,
|
||||
[40] = 9,
|
||||
[41] = 3,
|
||||
[42] = 11,
|
||||
[43] = 1,
|
||||
[44] = 9,
|
||||
[45] = 1,
|
||||
[46] = 3,
|
||||
[47] = 3,
|
||||
[48] = 17,
|
||||
[49] = 11,
|
||||
[50] = 1,
|
||||
[51] = 17,
|
||||
[52] = 11,
|
||||
[53] = 3,
|
||||
[54] = 1,
|
||||
[55] = 1,
|
||||
[56] = 1,
|
||||
[57] = 18,
|
||||
[58] = 11,
|
||||
[59] = 1,
|
||||
[60] = 18,
|
||||
[61] = 11,
|
||||
[62] = 1,
|
||||
[63] = 20,
|
||||
[64] = 11,
|
||||
[65] = 19,
|
||||
[66] = 19,
|
||||
[67] = 11,
|
||||
[68] = 1,
|
||||
[69] = 19,
|
||||
[70] = 19,
|
||||
[71] = 20,
|
||||
[72] = 1,
|
||||
[73] = 11,
|
||||
[74] = 1,
|
||||
[75] = 20,
|
||||
[76] = 11,
|
||||
[77] = 1,
|
||||
[78] = 1,
|
||||
[11] = 12,
|
||||
[12] = 4,
|
||||
[13] = 12,
|
||||
[14] = 2,
|
||||
[15] = 18,
|
||||
[16] = 12,
|
||||
[17] = 2,
|
||||
[18] = 17,
|
||||
[19] = 12,
|
||||
[20] = 2,
|
||||
[21] = 2,
|
||||
[22] = 12,
|
||||
[23] = 7,
|
||||
[24] = 12,
|
||||
[25] = 4,
|
||||
[26] = 16,
|
||||
[27] = 12,
|
||||
[28] = 4,
|
||||
[29] = 4,
|
||||
[30] = 12,
|
||||
[31] = 10,
|
||||
[32] = 12,
|
||||
[33] = 7,
|
||||
[34] = 7,
|
||||
[35] = 12,
|
||||
[36] = 2,
|
||||
[37] = 7,
|
||||
[38] = 2,
|
||||
[39] = 7,
|
||||
[40] = 10,
|
||||
[41] = 4,
|
||||
[42] = 12,
|
||||
[43] = 2,
|
||||
[44] = 10,
|
||||
[45] = 2,
|
||||
[46] = 4,
|
||||
[47] = 4,
|
||||
[48] = 16,
|
||||
[49] = 12,
|
||||
[50] = 2,
|
||||
[51] = 16,
|
||||
[52] = 12,
|
||||
[53] = 4,
|
||||
[54] = 2,
|
||||
[55] = 2,
|
||||
[56] = 2,
|
||||
[57] = 17,
|
||||
[58] = 12,
|
||||
[59] = 2,
|
||||
[60] = 17,
|
||||
[61] = 12,
|
||||
[62] = 2,
|
||||
[63] = 19,
|
||||
[64] = 12,
|
||||
[65] = 18,
|
||||
[66] = 18,
|
||||
[67] = 12,
|
||||
[68] = 2,
|
||||
[69] = 18,
|
||||
[70] = 18,
|
||||
[71] = 19,
|
||||
[72] = 2,
|
||||
[73] = 12,
|
||||
[74] = 2,
|
||||
[75] = 19,
|
||||
[76] = 12,
|
||||
[77] = 2,
|
||||
[78] = 2,
|
||||
[79] = 0,
|
||||
[80] = 0,
|
||||
[81] = 21,
|
||||
[82] = 11,
|
||||
[83] = 1,
|
||||
[84] = 21,
|
||||
[85] = 11,
|
||||
[81] = 20,
|
||||
[82] = 12,
|
||||
[83] = 2,
|
||||
[84] = 20,
|
||||
[85] = 12,
|
||||
[86] = 0,
|
||||
[87] = 23,
|
||||
[88] = 11,
|
||||
[89] = 22,
|
||||
[90] = 22,
|
||||
[91] = 11,
|
||||
[92] = 1,
|
||||
[93] = 22,
|
||||
[94] = 22,
|
||||
[95] = 23,
|
||||
[87] = 22,
|
||||
[88] = 12,
|
||||
[89] = 21,
|
||||
[90] = 21,
|
||||
[91] = 12,
|
||||
[92] = 2,
|
||||
[93] = 21,
|
||||
[94] = 21,
|
||||
[95] = 22,
|
||||
[96] = 0,
|
||||
[97] = 11,
|
||||
[98] = 1,
|
||||
[99] = 23,
|
||||
[100] = 11,
|
||||
[97] = 12,
|
||||
[98] = 2,
|
||||
[99] = 22,
|
||||
[100] = 12,
|
||||
[101] = 0,
|
||||
[102] = 26,
|
||||
[103] = 11,
|
||||
[104] = 24,
|
||||
[105] = 25,
|
||||
[106] = 11,
|
||||
[107] = 24,
|
||||
[108] = 24,
|
||||
[109] = 11,
|
||||
[110] = 1,
|
||||
[111] = 24,
|
||||
[112] = 24,
|
||||
[113] = 25,
|
||||
[114] = 11,
|
||||
[115] = 1,
|
||||
[116] = 25,
|
||||
[117] = 11,
|
||||
[118] = 24,
|
||||
[119] = 27,
|
||||
[120] = 11,
|
||||
[121] = 26,
|
||||
[122] = 26,
|
||||
[123] = 11,
|
||||
[124] = 1,
|
||||
[125] = 26,
|
||||
[102] = 25,
|
||||
[103] = 12,
|
||||
[104] = 23,
|
||||
[105] = 24,
|
||||
[106] = 12,
|
||||
[107] = 23,
|
||||
[108] = 23,
|
||||
[109] = 12,
|
||||
[110] = 2,
|
||||
[111] = 23,
|
||||
[112] = 23,
|
||||
[113] = 24,
|
||||
[114] = 12,
|
||||
[115] = 2,
|
||||
[116] = 24,
|
||||
[117] = 12,
|
||||
[118] = 23,
|
||||
[119] = 26,
|
||||
[120] = 12,
|
||||
[121] = 25,
|
||||
[122] = 25,
|
||||
[123] = 12,
|
||||
[124] = 2,
|
||||
[125] = 25,
|
||||
[126] = 0,
|
||||
[127] = 26,
|
||||
[127] = 25,
|
||||
[128] = 0,
|
||||
[129] = 27,
|
||||
[130] = 24,
|
||||
[131] = 11,
|
||||
[132] = 1,
|
||||
[133] = 27,
|
||||
[129] = 26,
|
||||
[130] = 23,
|
||||
[131] = 12,
|
||||
[132] = 2,
|
||||
[133] = 26,
|
||||
};
|
||||
|
||||
PARSE_TABLE = {
|
||||
|
|
@ -420,7 +565,7 @@ PARSE_TABLE = {
|
|||
[1] = {
|
||||
[ts_aux_sym_token3] = SHIFT(2),
|
||||
[ts_aux_sym_token4] = SHIFT(100),
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_difference, 1),
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_sum, 1),
|
||||
},
|
||||
[2] = {
|
||||
[ts_sym__operand1] = SHIFT(3),
|
||||
|
|
@ -440,7 +585,7 @@ PARSE_TABLE = {
|
|||
[4] = {
|
||||
[ts_aux_sym_token5] = SHIFT(5),
|
||||
[ts_aux_sym_token6] = SHIFT(85),
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_product, 1),
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_quotient, 1),
|
||||
},
|
||||
[5] = {
|
||||
[ts_sym__operand2] = SHIFT(6),
|
||||
|
|
@ -488,7 +633,7 @@ PARSE_TABLE = {
|
|||
[ts_builtin_sym_error] = SHIFT(78),
|
||||
},
|
||||
[12] = {
|
||||
[ts_aux_sym_token2] = REDUCE(ts_sym_difference, 1),
|
||||
[ts_aux_sym_token2] = REDUCE(ts_sym_sum, 1),
|
||||
[ts_aux_sym_token3] = SHIFT(13),
|
||||
[ts_aux_sym_token4] = SHIFT(76),
|
||||
},
|
||||
|
|
@ -559,8 +704,8 @@ PARSE_TABLE = {
|
|||
},
|
||||
[23] = {
|
||||
[ts_aux_sym_token2] = REDUCE(ts_sym_quotient, 1),
|
||||
[ts_aux_sym_token3] = REDUCE(ts_sym_quotient, 1),
|
||||
[ts_aux_sym_token4] = REDUCE(ts_sym_product, 1),
|
||||
[ts_aux_sym_token3] = REDUCE(ts_sym_product, 1),
|
||||
[ts_aux_sym_token4] = REDUCE(ts_sym_quotient, 1),
|
||||
[ts_aux_sym_token5] = SHIFT(24),
|
||||
[ts_aux_sym_token6] = SHIFT(52),
|
||||
},
|
||||
|
|
@ -1080,7 +1225,7 @@ PARSE_TABLE = {
|
|||
},
|
||||
[102] = {
|
||||
[ts_aux_sym_token3] = REDUCE(ts_sym_quotient, 1),
|
||||
[ts_aux_sym_token4] = REDUCE(ts_sym_quotient, 1),
|
||||
[ts_aux_sym_token4] = REDUCE(ts_sym_product, 1),
|
||||
[ts_aux_sym_token5] = SHIFT(103),
|
||||
[ts_aux_sym_token6] = SHIFT(117),
|
||||
[ts_builtin_sym_end] = REDUCE(ts_sym_quotient, 1),
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -58,68 +58,100 @@ LEX_FN() {
|
|||
START_LEXER();
|
||||
switch (lex_state) {
|
||||
case 0:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if ((lookahead == '\t') ||
|
||||
(lookahead == '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(0);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
if (lookahead == ',')
|
||||
ADVANCE(2);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_builtin_sym_end);
|
||||
case 2:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token3);
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token4);
|
||||
case 4:
|
||||
if (lookahead == '}')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR();
|
||||
case 5:
|
||||
if (lookahead == ',')
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(2);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(6);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(3);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token3);
|
||||
case 4:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token4);
|
||||
case 5:
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(5);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case 6:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token6);
|
||||
case 7:
|
||||
if (lookahead == ']')
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(6);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(3);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
LEX_ERROR();
|
||||
case 7:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token6);
|
||||
case 8:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(9);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(19);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(24);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(25);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(30);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(34);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(38);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(8);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
LEX_ERROR();
|
||||
case 9:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(9);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(16);
|
||||
if (']' <= lookahead && lookahead <= '\\')
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(18);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(19);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(24);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(28);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(32);
|
||||
LEX_ERROR();
|
||||
case 10:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(12);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= lookahead && lookahead <= '\\')
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 11:
|
||||
if (!((lookahead == '\"') ||
|
||||
|
|
@ -129,8 +161,6 @@ LEX_FN() {
|
|||
ADVANCE(12);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= lookahead && lookahead <= '\\')
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 12:
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
|
|
@ -140,12 +170,8 @@ LEX_FN() {
|
|||
ADVANCE(11);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(14);
|
||||
if ('#' <= lookahead && lookahead <= '\"')
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= lookahead && lookahead <= '\\')
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 14:
|
||||
if (!((lookahead == '\"') ||
|
||||
|
|
@ -155,69 +181,47 @@ LEX_FN() {
|
|||
ADVANCE(12);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= lookahead && lookahead <= '\\')
|
||||
ADVANCE(15);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 15:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 16:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(17);
|
||||
if ('#' <= lookahead && lookahead <= '\"')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= lookahead && lookahead <= '\\')
|
||||
if (lookahead == '.')
|
||||
ADVANCE(16);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 16:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(17);
|
||||
LEX_ERROR();
|
||||
case 17:
|
||||
if (!((lookahead == '\"') ||
|
||||
(lookahead == '\\')))
|
||||
ADVANCE(11);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(12);
|
||||
if (lookahead == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= lookahead && lookahead <= '\\')
|
||||
ADVANCE(15);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 18:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
case 19:
|
||||
if (lookahead == '.')
|
||||
ADVANCE(20);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(23);
|
||||
ADVANCE(17);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 18:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token5);
|
||||
case 19:
|
||||
if (lookahead == 'a')
|
||||
ADVANCE(20);
|
||||
LEX_ERROR();
|
||||
case 20:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(21);
|
||||
LEX_ERROR();
|
||||
case 21:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
if (lookahead == 's')
|
||||
ADVANCE(22);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
LEX_ERROR();
|
||||
case 22:
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(22);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 23:
|
||||
if (lookahead == '.')
|
||||
ADVANCE(20);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(23);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
LEX_ERROR();
|
||||
case 23:
|
||||
ACCEPT_TOKEN(ts_sym_false);
|
||||
case 24:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token5);
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(25);
|
||||
LEX_ERROR();
|
||||
case 25:
|
||||
if (lookahead == 'a')
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(26);
|
||||
LEX_ERROR();
|
||||
case 26:
|
||||
|
|
@ -225,102 +229,145 @@ LEX_FN() {
|
|||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 27:
|
||||
if (lookahead == 's')
|
||||
ADVANCE(28);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_sym_null);
|
||||
case 28:
|
||||
if (lookahead == 'e')
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(29);
|
||||
LEX_ERROR();
|
||||
case 29:
|
||||
ACCEPT_TOKEN(ts_sym_false);
|
||||
case 30:
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(30);
|
||||
LEX_ERROR();
|
||||
case 30:
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(31);
|
||||
LEX_ERROR();
|
||||
case 31:
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(32);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_sym_true);
|
||||
case 32:
|
||||
if (lookahead == 'l')
|
||||
ADVANCE(33);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_aux_sym_token1);
|
||||
case 33:
|
||||
ACCEPT_TOKEN(ts_sym_null);
|
||||
case 34:
|
||||
if (lookahead == 'r')
|
||||
ADVANCE(35);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(33);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(34);
|
||||
LEX_ERROR();
|
||||
case 34:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token2);
|
||||
case 35:
|
||||
if (lookahead == 'u')
|
||||
ADVANCE(36);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(35);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case 36:
|
||||
if (lookahead == 'e')
|
||||
ADVANCE(37);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(36);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(18);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(19);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(24);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(28);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(32);
|
||||
LEX_ERROR();
|
||||
case 37:
|
||||
ACCEPT_TOKEN(ts_sym_true);
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(37);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
case 38:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token1);
|
||||
case 39:
|
||||
if (lookahead == ':')
|
||||
ADVANCE(40);
|
||||
LEX_ERROR();
|
||||
case 40:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token2);
|
||||
case 41:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(9);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR();
|
||||
case 42:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(9);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(19);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(24);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(6);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(25);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(30);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(34);
|
||||
if (lookahead == '{')
|
||||
START_TOKEN();
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(38);
|
||||
LEX_ERROR();
|
||||
case 43:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(9);
|
||||
ADVANCE(10);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(3);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(34);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(18);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(19);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(24);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(28);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(32);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
case ts_lex_state_error:
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(9);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(2);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(19);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(40);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(24);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(6);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(25);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(30);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(34);
|
||||
if (lookahead == '{')
|
||||
if (lookahead == '\0')
|
||||
ADVANCE(1);
|
||||
if (('\t' <= lookahead && lookahead <= '\n') ||
|
||||
(lookahead == '\r') ||
|
||||
(lookahead == ' '))
|
||||
ADVANCE(38);
|
||||
if (lookahead == '}')
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(10);
|
||||
if (lookahead == ',')
|
||||
ADVANCE(3);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(15);
|
||||
if (lookahead == ':')
|
||||
ADVANCE(34);
|
||||
if (lookahead == '[')
|
||||
ADVANCE(18);
|
||||
if (lookahead == ']')
|
||||
ADVANCE(7);
|
||||
if (lookahead == 'f')
|
||||
ADVANCE(19);
|
||||
if (lookahead == 'n')
|
||||
ADVANCE(24);
|
||||
if (lookahead == 't')
|
||||
ADVANCE(28);
|
||||
if (lookahead == '{')
|
||||
ADVANCE(32);
|
||||
if (lookahead == '}')
|
||||
ADVANCE(4);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_PANIC();
|
||||
|
|
@ -328,64 +375,64 @@ LEX_FN() {
|
|||
}
|
||||
|
||||
LEX_STATES = {
|
||||
[0] = 8,
|
||||
[0] = 9,
|
||||
[1] = 0,
|
||||
[2] = 0,
|
||||
[3] = 41,
|
||||
[4] = 39,
|
||||
[5] = 8,
|
||||
[6] = 1,
|
||||
[7] = 1,
|
||||
[8] = 4,
|
||||
[3] = 35,
|
||||
[4] = 33,
|
||||
[5] = 9,
|
||||
[6] = 2,
|
||||
[7] = 2,
|
||||
[8] = 5,
|
||||
[9] = 0,
|
||||
[10] = 43,
|
||||
[11] = 39,
|
||||
[12] = 8,
|
||||
[13] = 1,
|
||||
[14] = 4,
|
||||
[15] = 41,
|
||||
[16] = 39,
|
||||
[17] = 8,
|
||||
[18] = 1,
|
||||
[19] = 4,
|
||||
[20] = 1,
|
||||
[21] = 42,
|
||||
[22] = 5,
|
||||
[23] = 5,
|
||||
[24] = 7,
|
||||
[25] = 1,
|
||||
[26] = 8,
|
||||
[27] = 5,
|
||||
[28] = 7,
|
||||
[29] = 41,
|
||||
[30] = 39,
|
||||
[31] = 8,
|
||||
[32] = 1,
|
||||
[33] = 4,
|
||||
[34] = 5,
|
||||
[35] = 5,
|
||||
[36] = 1,
|
||||
[37] = 4,
|
||||
[38] = 5,
|
||||
[39] = 42,
|
||||
[40] = 5,
|
||||
[41] = 7,
|
||||
[42] = 5,
|
||||
[43] = 5,
|
||||
[44] = 1,
|
||||
[45] = 1,
|
||||
[46] = 1,
|
||||
[47] = 4,
|
||||
[48] = 1,
|
||||
[49] = 1,
|
||||
[50] = 4,
|
||||
[10] = 37,
|
||||
[11] = 33,
|
||||
[12] = 9,
|
||||
[13] = 2,
|
||||
[14] = 5,
|
||||
[15] = 35,
|
||||
[16] = 33,
|
||||
[17] = 9,
|
||||
[18] = 2,
|
||||
[19] = 5,
|
||||
[20] = 2,
|
||||
[21] = 36,
|
||||
[22] = 6,
|
||||
[23] = 6,
|
||||
[24] = 8,
|
||||
[25] = 2,
|
||||
[26] = 9,
|
||||
[27] = 6,
|
||||
[28] = 8,
|
||||
[29] = 35,
|
||||
[30] = 33,
|
||||
[31] = 9,
|
||||
[32] = 2,
|
||||
[33] = 5,
|
||||
[34] = 6,
|
||||
[35] = 6,
|
||||
[36] = 2,
|
||||
[37] = 5,
|
||||
[38] = 6,
|
||||
[39] = 36,
|
||||
[40] = 6,
|
||||
[41] = 8,
|
||||
[42] = 6,
|
||||
[43] = 6,
|
||||
[44] = 2,
|
||||
[45] = 2,
|
||||
[46] = 2,
|
||||
[47] = 5,
|
||||
[48] = 2,
|
||||
[49] = 2,
|
||||
[50] = 5,
|
||||
[51] = 0,
|
||||
[52] = 1,
|
||||
[53] = 4,
|
||||
[52] = 2,
|
||||
[53] = 5,
|
||||
[54] = 0,
|
||||
[55] = 42,
|
||||
[56] = 5,
|
||||
[57] = 7,
|
||||
[55] = 36,
|
||||
[56] = 6,
|
||||
[57] = 8,
|
||||
[58] = 0,
|
||||
[59] = 0,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -51,10 +51,11 @@ static ts_tree * ts_lex(ts_lexer *lexer, state_id lex_state)
|
|||
|
||||
#define START_LEXER() \
|
||||
char lookahead; \
|
||||
ts_lexer_skip_whitespace(lexer); \
|
||||
if (!ts_lexer_lookahead_char(lexer)) return ts_tree_make_leaf(ts_builtin_sym_end, 0, 0); \
|
||||
next_state: \
|
||||
lookahead = ts_lexer_lookahead_char(lexer);
|
||||
|
||||
#define START_TOKEN() \
|
||||
ts_lexer_start_token(lexer);
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ ts_lexer_advance(lexer); lex_state = state_index; goto next_state; }
|
||||
|
|
@ -73,7 +74,7 @@ static const ts_parse_action ts_parse_actions[ts_state_count][ts_symbol_count]
|
|||
|
||||
#define EXPORT_PARSER(constructor_name) \
|
||||
ts_parser constructor_name() { \
|
||||
return (ts_parser){ \
|
||||
return (ts_parser) { \
|
||||
.parse_fn = ts_parse, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.data = ts_lr_parser_make(ts_symbol_count, (const ts_parse_action *)ts_parse_actions, ts_lex_states, hidden_symbol_flags), \
|
||||
|
|
@ -161,6 +162,10 @@ static void ts_lexer_advance(ts_lexer *lexer) {
|
|||
}
|
||||
}
|
||||
|
||||
static void ts_lexer_start_token(ts_lexer *lexer) {
|
||||
lexer->token_start_position = ts_lexer_position(lexer);
|
||||
}
|
||||
|
||||
static ts_tree * ts_lexer_build_node(ts_lexer *lexer, ts_symbol symbol) {
|
||||
size_t current_position = ts_lexer_position(lexer);
|
||||
size_t size = current_position - lexer->token_start_position;
|
||||
|
|
@ -169,12 +174,6 @@ static ts_tree * ts_lexer_build_node(ts_lexer *lexer, ts_symbol symbol) {
|
|||
return ts_tree_make_leaf(symbol, size, offset);
|
||||
}
|
||||
|
||||
static void ts_lexer_skip_whitespace(ts_lexer *lexer) {
|
||||
while (isspace(ts_lexer_lookahead_char(lexer)))
|
||||
ts_lexer_advance(lexer);
|
||||
lexer->token_start_position = ts_lexer_position(lexer);
|
||||
}
|
||||
|
||||
static const state_id ts_lex_state_error = -1;
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,80 +0,0 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/build_tables/build_tables.h"
|
||||
#include <functional>
|
||||
|
||||
using namespace rules;
|
||||
using build_tables::build_tables;
|
||||
|
||||
static set<Symbol> keys(const map<Symbol, ParseAction> &map) {
|
||||
set<Symbol> result;
|
||||
for (auto pair : map) {
|
||||
result.insert(pair.first);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("building parse and lex tables", []() {
|
||||
PreparedGrammar grammar({
|
||||
{ "expression", choice({
|
||||
seq({
|
||||
sym("term"),
|
||||
sym("plus"),
|
||||
sym("term") }),
|
||||
sym("term") }) },
|
||||
{ "term", choice({
|
||||
sym("variable"),
|
||||
sym("number"),
|
||||
seq({
|
||||
sym("left-paren"),
|
||||
sym("expression"),
|
||||
sym("right-paren")
|
||||
}) }) }
|
||||
}, {});
|
||||
|
||||
PreparedGrammar lex_grammar({
|
||||
{ "plus", str("+") },
|
||||
{ "variable", pattern("\\a+") },
|
||||
{ "number", pattern("\\d+") },
|
||||
{ "left-paren", str("(") },
|
||||
{ "right-paren", str(")") }
|
||||
}, {});
|
||||
|
||||
ParseTable table;
|
||||
LexTable lex_table;
|
||||
|
||||
before_each([&]() {
|
||||
pair<ParseTable, LexTable> tables = build_tables::build_tables(grammar, lex_grammar);
|
||||
table = tables.first;
|
||||
lex_table = tables.second;
|
||||
});
|
||||
|
||||
function<ParseState(size_t)> parse_state = [&](size_t index) {
|
||||
return table.states[index];
|
||||
};
|
||||
|
||||
function<LexState(size_t)> lex_state = [&](size_t parse_state_index) {
|
||||
long index = table.states[parse_state_index].lex_state_id;
|
||||
return lex_table.states[index];
|
||||
};
|
||||
|
||||
it("has the right starting state", [&]() {
|
||||
AssertThat(keys(parse_state(0).actions), Equals(set<Symbol>({
|
||||
Symbol("expression"),
|
||||
Symbol("term"),
|
||||
Symbol("number"),
|
||||
Symbol("variable"),
|
||||
Symbol("left-paren"),
|
||||
})));
|
||||
|
||||
AssertThat(lex_state(0).expected_inputs(), Equals(set<CharacterSet>({
|
||||
CharacterSet({ '(' }),
|
||||
CharacterSet({ {'0', '9'} }),
|
||||
CharacterSet({ {'a', 'z'}, {'A', 'Z'} }),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
60
spec/compiler/build_tables/check_metadata_spec.cc
Normal file
60
spec/compiler/build_tables/check_metadata_spec.cc
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/build_tables/check_metadata.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("checking if rules have metadata", []() {
|
||||
MetadataValue value = MetadataValue(1 << 3);
|
||||
|
||||
it("returns true for a compatible metadata rule", [&]() {
|
||||
auto rule = make_shared<Metadata>(sym("x"), MetadataValue(value | 1));
|
||||
AssertThat(check_metadata(rule, value), IsTrue());
|
||||
});
|
||||
|
||||
it("returns false for an incompatible metadata rule", [&]() {
|
||||
auto rule = make_shared<Metadata>(sym("x"), MetadataValue(1 << 2));
|
||||
AssertThat(check_metadata(rule, value), IsFalse());
|
||||
});
|
||||
|
||||
it("returns false for a non-metadata rule", [&]() {
|
||||
auto rule = sym("x");
|
||||
AssertThat(check_metadata(rule, value), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true for a compatible metadata rule preceded by rules that can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
repeat(sym("x")),
|
||||
make_shared<Metadata>(sym("x"), MetadataValue(value | 1)),
|
||||
});
|
||||
|
||||
AssertThat(check_metadata(rule, value), IsTrue());
|
||||
});
|
||||
|
||||
it("returns true for a choice including a compatible metadata rule", [&]() {
|
||||
auto rule = choice({
|
||||
sym("x"),
|
||||
make_shared<Metadata>(sym("x"), MetadataValue(value | 1)),
|
||||
});
|
||||
|
||||
AssertThat(check_metadata(rule, value), IsTrue());
|
||||
});
|
||||
|
||||
it("returns true for a repetition containing a compatible metadata rule", [&]() {
|
||||
auto rule = repeat(make_shared<Metadata>(sym("x"), MetadataValue(value | 1)));
|
||||
AssertThat(check_metadata(rule, value), IsTrue());
|
||||
});
|
||||
|
||||
it("returns true for a metadata rule preceded by rules that cannot be blank", [&]() {
|
||||
auto rule = seq({
|
||||
sym("x"),
|
||||
make_shared<Metadata>(sym("x"), MetadataValue(value | 1)),
|
||||
});
|
||||
AssertThat(check_metadata(rule, value), IsFalse());
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
using std::set;
|
||||
using namespace build_tables;
|
||||
|
|
@ -83,6 +84,14 @@ describe("computing FIRST sets", []() {
|
|||
})));
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores metadata rules", [&]() {
|
||||
auto rule = make_shared<Metadata>(sym("x"), MetadataValue(1));
|
||||
|
||||
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol("x"),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
37
spec/compiler/build_tables/merge_transitions_spec.cc
Normal file
37
spec/compiler/build_tables/merge_transitions_spec.cc
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/merge_transitions.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("merging character set transitions", []() {
|
||||
typedef map<CharacterSet, int> int_map;
|
||||
|
||||
auto bitwise = [](int l, int r) -> int {
|
||||
return l + r;
|
||||
};
|
||||
|
||||
describe("when two of the right transitions intersect one of the left transitions", [&]() {
|
||||
it("splits the left-hand transition correctly", [&]() {
|
||||
int_map map1({
|
||||
{ CharacterSet({ 'a', 'c' }), 1 },
|
||||
});
|
||||
|
||||
int_map map2({
|
||||
{ CharacterSet({ 'a' }), 2 },
|
||||
{ CharacterSet({ 'c' }), 4 },
|
||||
});
|
||||
|
||||
AssertThat(merge_char_transitions<int>(map1, map2, bitwise), Equals(int_map({
|
||||
{ CharacterSet({ 'a' }), 3 },
|
||||
{ CharacterSet({ 'c' }), 5 },
|
||||
})));
|
||||
|
||||
AssertThat(merge_char_transitions<int>(map2, map1, bitwise), Equals(merge_char_transitions<int>(map1, map2, bitwise)));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
#include "compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
|
|
@ -45,6 +46,14 @@ describe("checking if rules can be blank", [&]() {
|
|||
rule = seq({ blank(), choice({ sym("x"), blank() }) });
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
});
|
||||
|
||||
it("ignores metadata rules", [&]() {
|
||||
rule = make_shared<rules::Metadata>(blank(), rules::MetadataValue(0));
|
||||
AssertThat(rule_can_be_blank(rule), IsTrue());
|
||||
|
||||
rule = make_shared<rules::Metadata>(sym("one"), rules::MetadataValue(0));
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
});
|
||||
|
||||
describe("checking recursively (by expanding non-terminals)", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
|
|
|
|||
|
|
@ -171,10 +171,7 @@ describe("rule transitions", []() {
|
|||
CharacterSet({ 'a' }),
|
||||
seq({
|
||||
character({ 'b' }),
|
||||
choice({
|
||||
rule,
|
||||
blank()
|
||||
})
|
||||
rule,
|
||||
})
|
||||
}})));
|
||||
|
||||
|
|
@ -182,13 +179,8 @@ describe("rule transitions", []() {
|
|||
AssertThat(
|
||||
char_transitions(rule),
|
||||
Equals(rule_map<CharacterSet>({
|
||||
{
|
||||
CharacterSet({ 'a' }),
|
||||
choice({
|
||||
rule,
|
||||
blank()
|
||||
})
|
||||
}})));
|
||||
{ CharacterSet({ 'a' }), rule }
|
||||
})));
|
||||
});
|
||||
|
||||
describe("regression tests (somewhat redundant, should maybe be deleted later)", []() {
|
||||
|
|
@ -203,10 +195,7 @@ describe("rule transitions", []() {
|
|||
|
||||
AssertThat(char_transitions(rule), Equals(rule_map<CharacterSet>({
|
||||
{ CharacterSet({ '"' }).complement(), seq({
|
||||
choice({
|
||||
repeat(character({ '"' }, false)),
|
||||
blank(),
|
||||
}),
|
||||
repeat(character({ '"' }, false)),
|
||||
character({ '"' }), }) },
|
||||
{ CharacterSet({ '"' }), blank() },
|
||||
})));
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
==========================================
|
||||
parses multiple statements
|
||||
==========================================
|
||||
var x = {};
|
||||
var x = {}
|
||||
firstFunction(x);
|
||||
secondFunction(x);
|
||||
---
|
||||
|
|
|
|||
|
|
@ -4,11 +4,16 @@
|
|||
#include <unordered_map>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
|
||||
#include "stream_methods.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
using std::string;
|
||||
|
|
@ -48,13 +53,20 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (auto transition : char_transitions(item_set, grammar)) {
|
||||
auto transitions = char_transitions(item_set, grammar);
|
||||
for (auto transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(item_set);
|
||||
LexItemSet new_item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(new_item_set);
|
||||
lex_table.add_action(state_id, rule, LexAction::Advance(new_state_id));
|
||||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (auto &item : item_set)
|
||||
if (item.has_metadata(rules::START_TOKEN))
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (LexItem item : item_set) {
|
||||
|
|
@ -80,23 +92,35 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build({
|
||||
make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
|
||||
make_shared<rules::Metadata>(rule, rules::START_TOKEN)
|
||||
});
|
||||
}
|
||||
|
||||
LexItemSet lex_item_set_for_parse_state(const ParseState &state) {
|
||||
LexItemSet result;
|
||||
for (auto &symbol : state.expected_inputs())
|
||||
if (lex_grammar.has_definition(symbol)) {
|
||||
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
}
|
||||
result.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy())));
|
||||
return result;
|
||||
}
|
||||
|
||||
void assign_lex_state(ParseStateId state_id) {
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
LexItemSet item_set;
|
||||
for (auto &symbol : state.expected_inputs()) {
|
||||
if (lex_grammar.has_definition(symbol))
|
||||
item_set.insert(LexItem(symbol, lex_grammar.rule(symbol)));
|
||||
}
|
||||
|
||||
state.lex_state_id = add_lex_state(item_set);
|
||||
state.lex_state_id = add_lex_state(lex_item_set_for_parse_state(state));
|
||||
}
|
||||
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
auto state_id = lex_state_id_for_item_set(item_set);
|
||||
if (state_id == NOT_FOUND) {
|
||||
state_id = lex_table.add_state();
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_token_start(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
}
|
||||
|
|
@ -119,13 +143,14 @@ namespace tree_sitter {
|
|||
void add_error_lex_state() {
|
||||
LexItemSet error_item_set;
|
||||
for (auto &pair : lex_grammar.rules) {
|
||||
LexItem item(Symbol(pair.first, rules::SymbolTypeNormal), pair.second);
|
||||
LexItem item(Symbol(pair.first, rules::SymbolTypeNormal), after_separators(pair.second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
for (auto &pair : lex_grammar.aux_rules) {
|
||||
LexItem item(Symbol(pair.first, rules::SymbolTypeAuxiliary), pair.second);
|
||||
LexItem item(Symbol(pair.first, rules::SymbolTypeAuxiliary), after_separators(pair.second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
error_item_set.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy())));
|
||||
add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
}
|
||||
|
|
|
|||
39
src/compiler/build_tables/check_metadata.cc
Normal file
39
src/compiler/build_tables/check_metadata.cc
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#include "check_metadata.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
class HasMetadata : public rules::RuleFn<bool> {
|
||||
rules::MetadataValue metadata_value;
|
||||
public:
|
||||
HasMetadata(rules::MetadataValue value) : metadata_value(value) {}
|
||||
|
||||
void visit(const rules::Choice *rule) {
|
||||
value = apply(rule->left) || apply(rule->right);
|
||||
}
|
||||
|
||||
void visit(const rules::Repeat *rule) {
|
||||
value = apply(rule->content);
|
||||
}
|
||||
|
||||
void visit(const rules::Seq *rule) {
|
||||
bool result = apply(rule->left);
|
||||
if (rule_can_be_blank(rule->left))
|
||||
result = result || apply(rule->right);
|
||||
value = result;
|
||||
}
|
||||
|
||||
void visit(const rules::Metadata *rule) {
|
||||
value = rule->value & metadata_value;
|
||||
}
|
||||
};
|
||||
|
||||
bool check_metadata(const rules::rule_ptr &rule, rules::MetadataValue value) {
|
||||
return HasMetadata(value).apply(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
13
src/compiler/build_tables/check_metadata.h
Normal file
13
src/compiler/build_tables/check_metadata.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_CHECK_METADATA_
|
||||
#define COMPILER_BUILD_TABLES_CHECK_METADATA_
|
||||
|
||||
#include "compiler/rules/rule.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
bool check_metadata(const rules::rule_ptr &rule, rules::MetadataValue value);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_CHECK_METADATA_
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
|
|
@ -34,6 +35,10 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const rules::Metadata *rule) {
|
||||
value = apply(rule->rule);
|
||||
}
|
||||
|
||||
void visit(const rules::Choice *rule) {
|
||||
value = set_union(apply(rule->left), apply(rule->right));
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/build_tables/check_metadata.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -18,7 +19,11 @@ namespace tree_sitter {
|
|||
bool Item::is_done() const {
|
||||
return rule_can_be_blank(rule);
|
||||
}
|
||||
|
||||
|
||||
bool Item::has_metadata(rules::MetadataValue value) const {
|
||||
return check_metadata(rule, value);
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const LexItem &item) {
|
||||
return stream <<
|
||||
string("#<item ") <<
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
|
@ -12,23 +13,24 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
class Item {
|
||||
public:
|
||||
Item(const rules::Symbol &lhs, const rules::rule_ptr rule);
|
||||
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool is_done() const;
|
||||
bool has_metadata(rules::MetadataValue) const;
|
||||
|
||||
const rules::Symbol lhs;
|
||||
const rules::rule_ptr rule;
|
||||
rules::Symbol lhs;
|
||||
rules::rule_ptr rule;
|
||||
};
|
||||
|
||||
class LexItem : public Item {
|
||||
public:
|
||||
LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule);
|
||||
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
|
||||
bool operator==(const LexItem &other) const;
|
||||
};
|
||||
|
||||
class ParseItem : public Item {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs,
|
||||
const rules::rule_ptr rule,
|
||||
rules::rule_ptr rule,
|
||||
const size_t consumed_symbol_count,
|
||||
const rules::Symbol &lookahead_sym);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
|
|
|
|||
|
|
@ -50,24 +50,32 @@ namespace tree_sitter {
|
|||
std::function<T(T, T)> merge_fn) {
|
||||
std::map<rules::CharacterSet, T> result(left);
|
||||
for (auto &new_pair : right) {
|
||||
rules::CharacterSet new_rule = new_pair.first;
|
||||
rules::CharacterSet new_char_set = new_pair.first;
|
||||
T new_value = new_pair.second;
|
||||
|
||||
for (auto &existing_pair : left) {
|
||||
rules::CharacterSet existing_rule = existing_pair.first;
|
||||
T existing_value = existing_pair.second;
|
||||
std::map<rules::CharacterSet, T> pairs_to_insert;
|
||||
|
||||
auto iter = result.begin();
|
||||
while (iter != result.end()) {
|
||||
rules::CharacterSet char_set = iter->first;
|
||||
T value = iter->second;
|
||||
|
||||
rules::CharacterSet intersection = existing_rule.remove_set(new_rule);
|
||||
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
|
||||
if (!intersection.is_empty()) {
|
||||
result.erase(existing_pair.first);
|
||||
if (!existing_rule.is_empty())
|
||||
result.insert({ existing_rule, existing_value });
|
||||
result.insert({ intersection, merge_fn(existing_value, new_value) });
|
||||
new_rule.remove_set(intersection);
|
||||
new_char_set.remove_set(intersection);
|
||||
if (!char_set.is_empty())
|
||||
pairs_to_insert.insert({ char_set, value });
|
||||
pairs_to_insert.insert({ intersection, merge_fn(value, new_value) });
|
||||
result.erase(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
if (!new_rule.is_empty())
|
||||
result.insert({ new_rule, new_pair.second });
|
||||
|
||||
result.insert(pairs_to_insert.begin(), pairs_to_insert.end());
|
||||
|
||||
if (!new_char_set.is_empty())
|
||||
result.insert({ new_char_set, new_pair.second });
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
|
|
@ -33,6 +34,10 @@ namespace tree_sitter {
|
|||
void visit(const rules::Seq *rule) {
|
||||
value = apply(rule->left) && apply(rule->right);
|
||||
}
|
||||
|
||||
void visit(const rules::Metadata *rule) {
|
||||
value = apply(rule->rule);
|
||||
}
|
||||
};
|
||||
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
|
@ -65,27 +66,32 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
void visit(const rules::Choice *rule) {
|
||||
this->value = merge_transitions<T>(this->apply(rule->left),
|
||||
this->apply(rule->right));
|
||||
auto left_transitions = this->apply(rule->left);
|
||||
auto right_transitions = this->apply(rule->right);
|
||||
this->value = merge_transitions<T>(left_transitions,
|
||||
right_transitions);
|
||||
}
|
||||
|
||||
void visit(const rules::Seq *rule) {
|
||||
auto result = map_transitions(this->apply(rule->left), [&](const rule_ptr left_rule) {
|
||||
return rules::Seq::Build({ left_rule, rule->right });
|
||||
});
|
||||
if (rule_can_be_blank(rule->left))
|
||||
result = merge_transitions<T>(result, this->apply(rule->right));
|
||||
if (rule_can_be_blank(rule->left)) {
|
||||
auto right_transitions = this->apply(rule->right);
|
||||
result = merge_transitions<T>(result, right_transitions);
|
||||
}
|
||||
this->value = result;
|
||||
}
|
||||
|
||||
void visit(const rules::Repeat *rule) {
|
||||
this->value = map_transitions(this->apply(rule->content), [&](const rule_ptr &value) {
|
||||
return rules::Seq::Build({
|
||||
value,
|
||||
make_shared<rules::Choice>(rule->copy(), make_shared<rules::Blank>())
|
||||
});
|
||||
return rules::Seq::Build({ value, rule->copy() });
|
||||
});
|
||||
}
|
||||
|
||||
void visit(const rules::Metadata *rule) {
|
||||
this->value = this->apply(rule->rule);
|
||||
}
|
||||
|
||||
void visit(const rules::String *rule) {
|
||||
rule_ptr result = make_shared<rules::Blank>();
|
||||
|
|
|
|||
|
|
@ -145,22 +145,24 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
string switch_on_lookahead_char(const LexState &parse_state) {
|
||||
string code_for_lex_state(const LexState &lex_state) {
|
||||
string result = "";
|
||||
auto expected_inputs = parse_state.expected_inputs();
|
||||
for (auto pair : parse_state.actions)
|
||||
auto expected_inputs = lex_state.expected_inputs();
|
||||
if (lex_state.is_token_start)
|
||||
result += "START_TOKEN();" "\n";
|
||||
for (auto pair : lex_state.actions)
|
||||
if (!pair.first.is_empty())
|
||||
result += _if(condition_for_character_rule(pair.first),
|
||||
code_for_lex_actions(pair.second, expected_inputs));
|
||||
result += code_for_lex_actions(parse_state.default_action, expected_inputs);
|
||||
result += code_for_lex_actions(lex_state.default_action, expected_inputs);
|
||||
return result;
|
||||
}
|
||||
|
||||
string switch_on_lex_state() {
|
||||
string body = "";
|
||||
for (size_t i = 0; i < lex_table.states.size(); i++)
|
||||
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
|
||||
body += _case("ts_lex_state_error", switch_on_lookahead_char(lex_table.error_state));
|
||||
body += _case(std::to_string(i), code_for_lex_state(lex_table.states[i]));
|
||||
body += _case("ts_lex_state_error", code_for_lex_state(lex_table.error_state));
|
||||
body += _default("LEX_PANIC();");
|
||||
return _switch("lex_state", body);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,10 @@ namespace tree_sitter {
|
|||
return "\\\"";
|
||||
case '\n':
|
||||
return "\\n";
|
||||
case '\r':
|
||||
return "\\r";
|
||||
case '\t':
|
||||
return "\\t";
|
||||
case '\\':
|
||||
return "\\\\";
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -57,19 +57,19 @@ namespace tree_sitter {
|
|||
return states.size() - 1;
|
||||
}
|
||||
|
||||
LexState & state(LexTable *table, LexStateId id) {
|
||||
LexState & LexTable::state(LexStateId id) {
|
||||
if (id < 0)
|
||||
return table->error_state;
|
||||
return error_state;
|
||||
else
|
||||
return table->states[id];
|
||||
return states[id];
|
||||
}
|
||||
|
||||
void LexTable::add_action(LexStateId id, CharacterSet match, LexAction action) {
|
||||
state(this, id).actions[match] = action;
|
||||
state(id).actions[match] = action;
|
||||
}
|
||||
|
||||
void LexTable::add_default_action(LexStateId id, LexAction action) {
|
||||
state(this, id).default_action = action;
|
||||
state(id).default_action = action;
|
||||
}
|
||||
|
||||
const LexStateId LexTable::ERROR_STATE_ID = -1;
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ namespace tree_sitter {
|
|||
std::map<rules::CharacterSet, LexAction> actions;
|
||||
LexAction default_action;
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool is_token_start;
|
||||
};
|
||||
|
||||
typedef int64_t LexStateId;
|
||||
|
|
@ -59,6 +60,7 @@ namespace tree_sitter {
|
|||
LexStateId add_state();
|
||||
void add_action(LexStateId state_id, rules::CharacterSet rule, LexAction action);
|
||||
void add_default_action(LexStateId state_id, LexAction action);
|
||||
LexState & state(LexStateId state_id);
|
||||
|
||||
std::vector<LexState> states;
|
||||
LexState error_state;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,12 @@ namespace tree_sitter {
|
|||
switch (input) {
|
||||
case '\0':
|
||||
return "<EOF>";
|
||||
case '\n':
|
||||
return "\\n";
|
||||
case '\r':
|
||||
return "\\r";
|
||||
case '\t':
|
||||
return "\\t";
|
||||
case MAX_CHAR:
|
||||
return "<MAX>";
|
||||
default:
|
||||
|
|
|
|||
34
src/compiler/rules/metadata.cc
Normal file
34
src/compiler/rules/metadata.cc
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
#include "compiler/rules/metadata.h"
|
||||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::hash;
|
||||
using std::make_shared;
|
||||
|
||||
namespace rules {
|
||||
Metadata::Metadata(rule_ptr rule, MetadataValue value) : rule(rule), value(value) {}
|
||||
|
||||
bool Metadata::operator==(const Rule &rule) const {
|
||||
auto other = dynamic_cast<const Metadata *>(&rule);
|
||||
return other && other->value == value && other->rule->operator==(*this->rule);
|
||||
}
|
||||
|
||||
size_t Metadata::hash_code() const {
|
||||
return hash<int>()(value);
|
||||
}
|
||||
|
||||
rule_ptr Metadata::copy() const {
|
||||
return make_shared<Metadata>(rule, value);
|
||||
}
|
||||
|
||||
std::string Metadata::to_string() const {
|
||||
return "#<metadata " + rule->to_string() + ">";
|
||||
}
|
||||
|
||||
void Metadata::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
30
src/compiler/rules/metadata.h
Normal file
30
src/compiler/rules/metadata.h
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#ifndef COMPILER_RULES_METADATA_H_
|
||||
#define COMPILER_RULES_METADATA_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
typedef enum {
|
||||
NONE = 0,
|
||||
START_TOKEN = 1,
|
||||
} MetadataValue;
|
||||
|
||||
class Metadata : public Rule {
|
||||
public:
|
||||
Metadata(rule_ptr rule, MetadataValue value);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
const rule_ptr rule;
|
||||
const MetadataValue value;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMPILER_RULES_METADATA_H_
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
|
@ -183,7 +184,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
string Pattern::to_string() const {
|
||||
return string("#<pattern '") + value + "'>";
|
||||
return string("#<pattern '") + util::escape_string(value) + "'>";
|
||||
}
|
||||
|
||||
void Pattern::accept(Visitor *visitor) const {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
|
||||
|
|
@ -15,6 +16,7 @@ namespace tree_sitter {
|
|||
void Visitor::visit(const Blank *rule) { default_visit(rule); }
|
||||
void Visitor::visit(const CharacterSet *rule) { default_visit(rule); }
|
||||
void Visitor::visit(const Choice *rule) { default_visit(rule); }
|
||||
void Visitor::visit(const Metadata *rule) { default_visit(rule); }
|
||||
void Visitor::visit(const Pattern *rule) { default_visit(rule); }
|
||||
void Visitor::visit(const Repeat *rule) { default_visit(rule); }
|
||||
void Visitor::visit(const Seq *rule) { default_visit(rule); }
|
||||
|
|
@ -36,5 +38,9 @@ namespace tree_sitter {
|
|||
void IdentityRuleFn::visit(const Repeat *rule) {
|
||||
value = std::make_shared<Repeat>(apply(rule->content));
|
||||
}
|
||||
|
||||
void IdentityRuleFn::visit(const Metadata *rule) {
|
||||
value = std::make_shared<Metadata>(apply(rule->rule), rule->value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -13,6 +13,7 @@ namespace tree_sitter {
|
|||
class Seq;
|
||||
class String;
|
||||
class Pattern;
|
||||
class Metadata;
|
||||
|
||||
class Visitor {
|
||||
public:
|
||||
|
|
@ -20,6 +21,7 @@ namespace tree_sitter {
|
|||
virtual void visit(const Blank *rule);
|
||||
virtual void visit(const CharacterSet *rule);
|
||||
virtual void visit(const Choice *rule);
|
||||
virtual void visit(const Metadata *rule);
|
||||
virtual void visit(const Pattern *rule);
|
||||
virtual void visit(const Repeat *rule);
|
||||
virtual void visit(const Seq *rule);
|
||||
|
|
@ -41,8 +43,9 @@ namespace tree_sitter {
|
|||
|
||||
class IdentityRuleFn : public RuleFn<rule_ptr> {
|
||||
virtual void default_visit(const Rule *rule);
|
||||
virtual void visit(const Seq *rule);
|
||||
virtual void visit(const Choice *rule);
|
||||
virtual void visit(const Metadata *rule);
|
||||
virtual void visit(const Seq *rule);
|
||||
virtual void visit(const Repeat *rule);
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue