Fix handling of tokens consisting of separator characters

The parser is no longer hard-coded to skip whitespace. Tokens
such as newlines, whose characters overlap with the separator
characters, can now be correctly recognized.
This commit is contained in:
Max Brunsfeld 2014-04-03 19:10:09 -07:00
parent f39cb1890d
commit 1cc7e32e2d
32 changed files with 5401 additions and 4847 deletions

View file

@ -112,7 +112,7 @@ namespace tree_sitter_examples {
str("]") }) },
// Keywords
{ "_terminator", choice({ str(";"), str("\n") }) },
{ "_terminator", pattern("[;\n]") },
{ "_var", str("var") },
{ "_for", str("for") },
{ "_if", str("if") },

View file

@ -66,72 +66,113 @@ LEX_FN() {
START_LEXER();
switch (lex_state) {
case 0:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
LEX_ERROR();
case 1:
if (lookahead == ')')
ADVANCE(2);
LEX_ERROR();
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
ACCEPT_TOKEN(ts_aux_sym_token2);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == ')')
ADVANCE(3);
LEX_ERROR();
case 3:
if (lookahead == ')')
ADVANCE(2);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
LEX_ERROR();
ACCEPT_TOKEN(ts_aux_sym_token2);
case 4:
ACCEPT_TOKEN(ts_aux_sym_token3);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(4);
if (lookahead == ')')
ADVANCE(3);
if (lookahead == '+')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
LEX_ERROR();
case 5:
ACCEPT_TOKEN(ts_aux_sym_token4);
ACCEPT_TOKEN(ts_aux_sym_token3);
case 6:
if (lookahead == ')')
ADVANCE(2);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
if (lookahead == '/')
ADVANCE(8);
LEX_ERROR();
ACCEPT_TOKEN(ts_aux_sym_token4);
case 7:
ACCEPT_TOKEN(ts_aux_sym_token5);
case 8:
ACCEPT_TOKEN(ts_aux_sym_token6);
case 9:
if (lookahead == ')')
ADVANCE(2);
if (lookahead == '*')
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(7);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
if (lookahead == '/')
if (lookahead == ')')
ADVANCE(3);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '^')
ADVANCE(10);
if (lookahead == '+')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
if (lookahead == '/')
ADVANCE(9);
LEX_ERROR();
case 8:
ACCEPT_TOKEN(ts_aux_sym_token5);
case 9:
ACCEPT_TOKEN(ts_aux_sym_token6);
case 10:
ACCEPT_TOKEN(ts_aux_sym_token7);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(10);
if (lookahead == ')')
ADVANCE(3);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '+')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
if (lookahead == '/')
ADVANCE(9);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 11:
if (lookahead == '(')
ACCEPT_TOKEN(ts_aux_sym_token7);
case 12:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(12);
if ('0' <= lookahead && lookahead <= '9')
if (lookahead == '(')
ADVANCE(13);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(14);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(15);
LEX_ERROR();
case 12:
ACCEPT_TOKEN(ts_aux_sym_token1);
case 13:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(14);
ACCEPT_TOKEN(ts_sym_number);
ACCEPT_TOKEN(ts_aux_sym_token1);
case 14:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(14);
@ -141,123 +182,227 @@ LEX_FN() {
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(16);
ADVANCE(15);
ACCEPT_TOKEN(ts_sym_variable);
case 16:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(16);
ACCEPT_TOKEN(ts_sym_variable);
case 17:
if (lookahead == ')')
ADVANCE(2);
ADVANCE(3);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
if (lookahead == '^')
ADVANCE(10);
ADVANCE(11);
LEX_ERROR();
case 17:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(17);
if (lookahead == ')')
ADVANCE(3);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 18:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(18);
if (lookahead == ')')
ADVANCE(2);
if (lookahead == '^')
ADVANCE(10);
ADVANCE(3);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '/')
ADVANCE(9);
LEX_ERROR();
case 19:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(19);
if (lookahead == ')')
ADVANCE(2);
ADVANCE(3);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '/')
ADVANCE(8);
if (lookahead == '/')
ADVANCE(9);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 20:
if (lookahead == ')')
ADVANCE(2);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '/')
ADVANCE(8);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(20);
if (lookahead == '^')
ADVANCE(10);
ADVANCE(11);
LEX_ERROR();
case 21:
if (lookahead == '^')
ADVANCE(10);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(21);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '/')
ADVANCE(9);
LEX_ERROR();
case 22:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(22);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '/')
ADVANCE(8);
if (lookahead == '/')
ADVANCE(9);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 23:
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '/')
ADVANCE(8);
if (lookahead == '^')
ADVANCE(10);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(23);
if (lookahead == '+')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
LEX_ERROR();
case 24:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(24);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 25:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(25);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
if (lookahead == '^')
ADVANCE(10);
if (lookahead == '-')
ADVANCE(6);
if (lookahead == '/')
ADVANCE(9);
LEX_ERROR();
case 26:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(26);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
if (lookahead == '/')
ADVANCE(8);
if (lookahead == '+')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
if (lookahead == '/')
ADVANCE(9);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
case 27:
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
if (lookahead == '/')
ADVANCE(8);
if (lookahead == '^')
ADVANCE(10);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(27);
if (lookahead == '(')
ADVANCE(12);
if (lookahead == ')')
ADVANCE(2);
if (lookahead == '*')
ADVANCE(7);
if (lookahead == '+')
ADVANCE(4);
if (lookahead == '-')
ADVANCE(5);
if (lookahead == '/')
ADVANCE(8);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(13);
if (lookahead == ')')
ADVANCE(3);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '+')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
if (lookahead == '/')
ADVANCE(9);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(14);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(15);
if (lookahead == '^')
ADVANCE(10);
ADVANCE(11);
LEX_ERROR();
case ts_lex_state_error:
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(27);
if (lookahead == '(')
ADVANCE(13);
if (lookahead == ')')
ADVANCE(3);
if (lookahead == '*')
ADVANCE(8);
if (lookahead == '+')
ADVANCE(5);
if (lookahead == '-')
ADVANCE(6);
if (lookahead == '/')
ADVANCE(9);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(14);
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(15);
if (lookahead == '^')
ADVANCE(11);
LEX_ERROR();
default:
LEX_PANIC();
@ -265,140 +410,140 @@ LEX_FN() {
}
LEX_STATES = {
[0] = 11,
[1] = 24,
[2] = 11,
[0] = 12,
[1] = 23,
[2] = 12,
[3] = 0,
[4] = 22,
[5] = 11,
[4] = 21,
[5] = 12,
[6] = 0,
[7] = 21,
[8] = 11,
[7] = 20,
[8] = 12,
[9] = 0,
[10] = 0,
[11] = 11,
[12] = 3,
[13] = 11,
[14] = 1,
[15] = 19,
[16] = 11,
[17] = 1,
[18] = 18,
[19] = 11,
[20] = 1,
[21] = 1,
[22] = 11,
[23] = 6,
[24] = 11,
[25] = 3,
[26] = 17,
[27] = 11,
[28] = 3,
[29] = 3,
[30] = 11,
[31] = 9,
[32] = 11,
[33] = 6,
[34] = 6,
[35] = 11,
[36] = 1,
[37] = 6,
[38] = 1,
[39] = 6,
[40] = 9,
[41] = 3,
[42] = 11,
[43] = 1,
[44] = 9,
[45] = 1,
[46] = 3,
[47] = 3,
[48] = 17,
[49] = 11,
[50] = 1,
[51] = 17,
[52] = 11,
[53] = 3,
[54] = 1,
[55] = 1,
[56] = 1,
[57] = 18,
[58] = 11,
[59] = 1,
[60] = 18,
[61] = 11,
[62] = 1,
[63] = 20,
[64] = 11,
[65] = 19,
[66] = 19,
[67] = 11,
[68] = 1,
[69] = 19,
[70] = 19,
[71] = 20,
[72] = 1,
[73] = 11,
[74] = 1,
[75] = 20,
[76] = 11,
[77] = 1,
[78] = 1,
[11] = 12,
[12] = 4,
[13] = 12,
[14] = 2,
[15] = 18,
[16] = 12,
[17] = 2,
[18] = 17,
[19] = 12,
[20] = 2,
[21] = 2,
[22] = 12,
[23] = 7,
[24] = 12,
[25] = 4,
[26] = 16,
[27] = 12,
[28] = 4,
[29] = 4,
[30] = 12,
[31] = 10,
[32] = 12,
[33] = 7,
[34] = 7,
[35] = 12,
[36] = 2,
[37] = 7,
[38] = 2,
[39] = 7,
[40] = 10,
[41] = 4,
[42] = 12,
[43] = 2,
[44] = 10,
[45] = 2,
[46] = 4,
[47] = 4,
[48] = 16,
[49] = 12,
[50] = 2,
[51] = 16,
[52] = 12,
[53] = 4,
[54] = 2,
[55] = 2,
[56] = 2,
[57] = 17,
[58] = 12,
[59] = 2,
[60] = 17,
[61] = 12,
[62] = 2,
[63] = 19,
[64] = 12,
[65] = 18,
[66] = 18,
[67] = 12,
[68] = 2,
[69] = 18,
[70] = 18,
[71] = 19,
[72] = 2,
[73] = 12,
[74] = 2,
[75] = 19,
[76] = 12,
[77] = 2,
[78] = 2,
[79] = 0,
[80] = 0,
[81] = 21,
[82] = 11,
[83] = 1,
[84] = 21,
[85] = 11,
[81] = 20,
[82] = 12,
[83] = 2,
[84] = 20,
[85] = 12,
[86] = 0,
[87] = 23,
[88] = 11,
[89] = 22,
[90] = 22,
[91] = 11,
[92] = 1,
[93] = 22,
[94] = 22,
[95] = 23,
[87] = 22,
[88] = 12,
[89] = 21,
[90] = 21,
[91] = 12,
[92] = 2,
[93] = 21,
[94] = 21,
[95] = 22,
[96] = 0,
[97] = 11,
[98] = 1,
[99] = 23,
[100] = 11,
[97] = 12,
[98] = 2,
[99] = 22,
[100] = 12,
[101] = 0,
[102] = 26,
[103] = 11,
[104] = 24,
[105] = 25,
[106] = 11,
[107] = 24,
[108] = 24,
[109] = 11,
[110] = 1,
[111] = 24,
[112] = 24,
[113] = 25,
[114] = 11,
[115] = 1,
[116] = 25,
[117] = 11,
[118] = 24,
[119] = 27,
[120] = 11,
[121] = 26,
[122] = 26,
[123] = 11,
[124] = 1,
[125] = 26,
[102] = 25,
[103] = 12,
[104] = 23,
[105] = 24,
[106] = 12,
[107] = 23,
[108] = 23,
[109] = 12,
[110] = 2,
[111] = 23,
[112] = 23,
[113] = 24,
[114] = 12,
[115] = 2,
[116] = 24,
[117] = 12,
[118] = 23,
[119] = 26,
[120] = 12,
[121] = 25,
[122] = 25,
[123] = 12,
[124] = 2,
[125] = 25,
[126] = 0,
[127] = 26,
[127] = 25,
[128] = 0,
[129] = 27,
[130] = 24,
[131] = 11,
[132] = 1,
[133] = 27,
[129] = 26,
[130] = 23,
[131] = 12,
[132] = 2,
[133] = 26,
};
PARSE_TABLE = {
@ -420,7 +565,7 @@ PARSE_TABLE = {
[1] = {
[ts_aux_sym_token3] = SHIFT(2),
[ts_aux_sym_token4] = SHIFT(100),
[ts_builtin_sym_end] = REDUCE(ts_sym_difference, 1),
[ts_builtin_sym_end] = REDUCE(ts_sym_sum, 1),
},
[2] = {
[ts_sym__operand1] = SHIFT(3),
@ -440,7 +585,7 @@ PARSE_TABLE = {
[4] = {
[ts_aux_sym_token5] = SHIFT(5),
[ts_aux_sym_token6] = SHIFT(85),
[ts_builtin_sym_end] = REDUCE(ts_sym_product, 1),
[ts_builtin_sym_end] = REDUCE(ts_sym_quotient, 1),
},
[5] = {
[ts_sym__operand2] = SHIFT(6),
@ -488,7 +633,7 @@ PARSE_TABLE = {
[ts_builtin_sym_error] = SHIFT(78),
},
[12] = {
[ts_aux_sym_token2] = REDUCE(ts_sym_difference, 1),
[ts_aux_sym_token2] = REDUCE(ts_sym_sum, 1),
[ts_aux_sym_token3] = SHIFT(13),
[ts_aux_sym_token4] = SHIFT(76),
},
@ -559,8 +704,8 @@ PARSE_TABLE = {
},
[23] = {
[ts_aux_sym_token2] = REDUCE(ts_sym_quotient, 1),
[ts_aux_sym_token3] = REDUCE(ts_sym_quotient, 1),
[ts_aux_sym_token4] = REDUCE(ts_sym_product, 1),
[ts_aux_sym_token3] = REDUCE(ts_sym_product, 1),
[ts_aux_sym_token4] = REDUCE(ts_sym_quotient, 1),
[ts_aux_sym_token5] = SHIFT(24),
[ts_aux_sym_token6] = SHIFT(52),
},
@ -1080,7 +1225,7 @@ PARSE_TABLE = {
},
[102] = {
[ts_aux_sym_token3] = REDUCE(ts_sym_quotient, 1),
[ts_aux_sym_token4] = REDUCE(ts_sym_quotient, 1),
[ts_aux_sym_token4] = REDUCE(ts_sym_product, 1),
[ts_aux_sym_token5] = SHIFT(103),
[ts_aux_sym_token6] = SHIFT(117),
[ts_builtin_sym_end] = REDUCE(ts_sym_quotient, 1),

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -58,68 +58,100 @@ LEX_FN() {
START_LEXER();
switch (lex_state) {
case 0:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(0);
LEX_ERROR();
case 1:
if (lookahead == ',')
ADVANCE(2);
if (lookahead == '}')
ADVANCE(3);
LEX_ERROR();
ACCEPT_TOKEN(ts_builtin_sym_end);
case 2:
ACCEPT_TOKEN(ts_aux_sym_token3);
case 3:
ACCEPT_TOKEN(ts_aux_sym_token4);
case 4:
if (lookahead == '}')
ADVANCE(3);
LEX_ERROR();
case 5:
if (lookahead == ',')
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(2);
if (lookahead == ']')
ADVANCE(6);
if (lookahead == ',')
ADVANCE(3);
if (lookahead == '}')
ADVANCE(4);
LEX_ERROR();
case 3:
ACCEPT_TOKEN(ts_aux_sym_token3);
case 4:
ACCEPT_TOKEN(ts_aux_sym_token4);
case 5:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(5);
if (lookahead == '}')
ADVANCE(4);
LEX_ERROR();
case 6:
ACCEPT_TOKEN(ts_aux_sym_token6);
case 7:
if (lookahead == ']')
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(6);
if (lookahead == ',')
ADVANCE(3);
if (lookahead == ']')
ADVANCE(7);
LEX_ERROR();
case 7:
ACCEPT_TOKEN(ts_aux_sym_token6);
case 8:
if (lookahead == '\"')
ADVANCE(9);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(19);
if (lookahead == '[')
ADVANCE(24);
if (lookahead == 'f')
ADVANCE(25);
if (lookahead == 'n')
ADVANCE(30);
if (lookahead == 't')
ADVANCE(34);
if (lookahead == '{')
ADVANCE(38);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(8);
if (lookahead == ']')
ADVANCE(7);
LEX_ERROR();
case 9:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(9);
if (lookahead == '\"')
ADVANCE(10);
if (lookahead == '\\')
ADVANCE(16);
if (']' <= lookahead && lookahead <= '\\')
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(15);
if (lookahead == '[')
ADVANCE(18);
if (lookahead == 'f')
ADVANCE(19);
if (lookahead == 'n')
ADVANCE(24);
if (lookahead == 't')
ADVANCE(28);
if (lookahead == '{')
ADVANCE(32);
LEX_ERROR();
case 10:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(11);
if (lookahead == '\"')
ADVANCE(12);
if (lookahead == '\\')
ADVANCE(13);
if (']' <= lookahead && lookahead <= '\\')
ADVANCE(15);
LEX_ERROR();
case 11:
if (!((lookahead == '\"') ||
@ -129,8 +161,6 @@ LEX_FN() {
ADVANCE(12);
if (lookahead == '\\')
ADVANCE(13);
if (']' <= lookahead && lookahead <= '\\')
ADVANCE(15);
LEX_ERROR();
case 12:
ACCEPT_TOKEN(ts_sym_string);
@ -140,12 +170,8 @@ LEX_FN() {
ADVANCE(11);
if (lookahead == '\"')
ADVANCE(14);
if ('#' <= lookahead && lookahead <= '\"')
ADVANCE(11);
if (lookahead == '\\')
ADVANCE(13);
if (']' <= lookahead && lookahead <= '\\')
ADVANCE(15);
LEX_ERROR();
case 14:
if (!((lookahead == '\"') ||
@ -155,69 +181,47 @@ LEX_FN() {
ADVANCE(12);
if (lookahead == '\\')
ADVANCE(13);
if (']' <= lookahead && lookahead <= '\\')
ADVANCE(15);
ACCEPT_TOKEN(ts_sym_string);
case 15:
if (lookahead == '\"')
ADVANCE(11);
LEX_ERROR();
case 16:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(11);
if (lookahead == '\"')
ADVANCE(17);
if ('#' <= lookahead && lookahead <= '\"')
ADVANCE(10);
if (lookahead == '\\')
ADVANCE(13);
if (']' <= lookahead && lookahead <= '\\')
if (lookahead == '.')
ADVANCE(16);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(15);
ACCEPT_TOKEN(ts_sym_number);
case 16:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(17);
LEX_ERROR();
case 17:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(11);
if (lookahead == '\"')
ADVANCE(12);
if (lookahead == '\\')
ADVANCE(13);
if (']' <= lookahead && lookahead <= '\\')
ADVANCE(15);
ACCEPT_TOKEN(ts_sym_string);
case 18:
if (lookahead == '\"')
ADVANCE(10);
LEX_ERROR();
case 19:
if (lookahead == '.')
ADVANCE(20);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(23);
ADVANCE(17);
ACCEPT_TOKEN(ts_sym_number);
case 18:
ACCEPT_TOKEN(ts_aux_sym_token5);
case 19:
if (lookahead == 'a')
ADVANCE(20);
LEX_ERROR();
case 20:
if ('0' <= lookahead && lookahead <= '9')
if (lookahead == 'l')
ADVANCE(21);
LEX_ERROR();
case 21:
if ('0' <= lookahead && lookahead <= '9')
if (lookahead == 's')
ADVANCE(22);
ACCEPT_TOKEN(ts_sym_number);
LEX_ERROR();
case 22:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(22);
ACCEPT_TOKEN(ts_sym_number);
case 23:
if (lookahead == '.')
ADVANCE(20);
if ('0' <= lookahead && lookahead <= '9')
if (lookahead == 'e')
ADVANCE(23);
ACCEPT_TOKEN(ts_sym_number);
LEX_ERROR();
case 23:
ACCEPT_TOKEN(ts_sym_false);
case 24:
ACCEPT_TOKEN(ts_aux_sym_token5);
if (lookahead == 'u')
ADVANCE(25);
LEX_ERROR();
case 25:
if (lookahead == 'a')
if (lookahead == 'l')
ADVANCE(26);
LEX_ERROR();
case 26:
@ -225,102 +229,145 @@ LEX_FN() {
ADVANCE(27);
LEX_ERROR();
case 27:
if (lookahead == 's')
ADVANCE(28);
LEX_ERROR();
ACCEPT_TOKEN(ts_sym_null);
case 28:
if (lookahead == 'e')
if (lookahead == 'r')
ADVANCE(29);
LEX_ERROR();
case 29:
ACCEPT_TOKEN(ts_sym_false);
case 30:
if (lookahead == 'u')
ADVANCE(30);
LEX_ERROR();
case 30:
if (lookahead == 'e')
ADVANCE(31);
LEX_ERROR();
case 31:
if (lookahead == 'l')
ADVANCE(32);
LEX_ERROR();
ACCEPT_TOKEN(ts_sym_true);
case 32:
if (lookahead == 'l')
ADVANCE(33);
LEX_ERROR();
ACCEPT_TOKEN(ts_aux_sym_token1);
case 33:
ACCEPT_TOKEN(ts_sym_null);
case 34:
if (lookahead == 'r')
ADVANCE(35);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(33);
if (lookahead == ':')
ADVANCE(34);
LEX_ERROR();
case 34:
ACCEPT_TOKEN(ts_aux_sym_token2);
case 35:
if (lookahead == 'u')
ADVANCE(36);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(35);
if (lookahead == '\"')
ADVANCE(10);
if (lookahead == '}')
ADVANCE(4);
LEX_ERROR();
case 36:
if (lookahead == 'e')
ADVANCE(37);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(36);
if (lookahead == '\"')
ADVANCE(10);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(15);
if (lookahead == '[')
ADVANCE(18);
if (lookahead == ']')
ADVANCE(7);
if (lookahead == 'f')
ADVANCE(19);
if (lookahead == 'n')
ADVANCE(24);
if (lookahead == 't')
ADVANCE(28);
if (lookahead == '{')
ADVANCE(32);
LEX_ERROR();
case 37:
ACCEPT_TOKEN(ts_sym_true);
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(37);
if (lookahead == '\"')
ADVANCE(10);
LEX_ERROR();
case 38:
ACCEPT_TOKEN(ts_aux_sym_token1);
case 39:
if (lookahead == ':')
ADVANCE(40);
LEX_ERROR();
case 40:
ACCEPT_TOKEN(ts_aux_sym_token2);
case 41:
if (lookahead == '\"')
ADVANCE(9);
if (lookahead == '}')
ADVANCE(3);
LEX_ERROR();
case 42:
if (lookahead == '\"')
ADVANCE(9);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(19);
if (lookahead == '[')
ADVANCE(24);
if (lookahead == ']')
ADVANCE(6);
if (lookahead == 'f')
ADVANCE(25);
if (lookahead == 'n')
ADVANCE(30);
if (lookahead == 't')
ADVANCE(34);
if (lookahead == '{')
START_TOKEN();
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(38);
LEX_ERROR();
case 43:
if (lookahead == '\"')
ADVANCE(9);
ADVANCE(10);
if (lookahead == ',')
ADVANCE(3);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(15);
if (lookahead == ':')
ADVANCE(34);
if (lookahead == '[')
ADVANCE(18);
if (lookahead == ']')
ADVANCE(7);
if (lookahead == 'f')
ADVANCE(19);
if (lookahead == 'n')
ADVANCE(24);
if (lookahead == 't')
ADVANCE(28);
if (lookahead == '{')
ADVANCE(32);
if (lookahead == '}')
ADVANCE(4);
LEX_ERROR();
case ts_lex_state_error:
if (lookahead == '\"')
ADVANCE(9);
if (lookahead == ',')
ADVANCE(2);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(19);
if (lookahead == ':')
ADVANCE(40);
if (lookahead == '[')
ADVANCE(24);
if (lookahead == ']')
ADVANCE(6);
if (lookahead == 'f')
ADVANCE(25);
if (lookahead == 'n')
ADVANCE(30);
if (lookahead == 't')
ADVANCE(34);
if (lookahead == '{')
if (lookahead == '\0')
ADVANCE(1);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(38);
if (lookahead == '}')
if (lookahead == '\"')
ADVANCE(10);
if (lookahead == ',')
ADVANCE(3);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(15);
if (lookahead == ':')
ADVANCE(34);
if (lookahead == '[')
ADVANCE(18);
if (lookahead == ']')
ADVANCE(7);
if (lookahead == 'f')
ADVANCE(19);
if (lookahead == 'n')
ADVANCE(24);
if (lookahead == 't')
ADVANCE(28);
if (lookahead == '{')
ADVANCE(32);
if (lookahead == '}')
ADVANCE(4);
LEX_ERROR();
default:
LEX_PANIC();
@ -328,64 +375,64 @@ LEX_FN() {
}
LEX_STATES = {
[0] = 8,
[0] = 9,
[1] = 0,
[2] = 0,
[3] = 41,
[4] = 39,
[5] = 8,
[6] = 1,
[7] = 1,
[8] = 4,
[3] = 35,
[4] = 33,
[5] = 9,
[6] = 2,
[7] = 2,
[8] = 5,
[9] = 0,
[10] = 43,
[11] = 39,
[12] = 8,
[13] = 1,
[14] = 4,
[15] = 41,
[16] = 39,
[17] = 8,
[18] = 1,
[19] = 4,
[20] = 1,
[21] = 42,
[22] = 5,
[23] = 5,
[24] = 7,
[25] = 1,
[26] = 8,
[27] = 5,
[28] = 7,
[29] = 41,
[30] = 39,
[31] = 8,
[32] = 1,
[33] = 4,
[34] = 5,
[35] = 5,
[36] = 1,
[37] = 4,
[38] = 5,
[39] = 42,
[40] = 5,
[41] = 7,
[42] = 5,
[43] = 5,
[44] = 1,
[45] = 1,
[46] = 1,
[47] = 4,
[48] = 1,
[49] = 1,
[50] = 4,
[10] = 37,
[11] = 33,
[12] = 9,
[13] = 2,
[14] = 5,
[15] = 35,
[16] = 33,
[17] = 9,
[18] = 2,
[19] = 5,
[20] = 2,
[21] = 36,
[22] = 6,
[23] = 6,
[24] = 8,
[25] = 2,
[26] = 9,
[27] = 6,
[28] = 8,
[29] = 35,
[30] = 33,
[31] = 9,
[32] = 2,
[33] = 5,
[34] = 6,
[35] = 6,
[36] = 2,
[37] = 5,
[38] = 6,
[39] = 36,
[40] = 6,
[41] = 8,
[42] = 6,
[43] = 6,
[44] = 2,
[45] = 2,
[46] = 2,
[47] = 5,
[48] = 2,
[49] = 2,
[50] = 5,
[51] = 0,
[52] = 1,
[53] = 4,
[52] = 2,
[53] = 5,
[54] = 0,
[55] = 42,
[56] = 5,
[57] = 7,
[55] = 36,
[56] = 6,
[57] = 8,
[58] = 0,
[59] = 0,
};

View file

@ -51,10 +51,11 @@ static ts_tree * ts_lex(ts_lexer *lexer, state_id lex_state)
#define START_LEXER() \
char lookahead; \
ts_lexer_skip_whitespace(lexer); \
if (!ts_lexer_lookahead_char(lexer)) return ts_tree_make_leaf(ts_builtin_sym_end, 0, 0); \
next_state: \
lookahead = ts_lexer_lookahead_char(lexer);
#define START_TOKEN() \
ts_lexer_start_token(lexer);
#define ADVANCE(state_index) \
{ ts_lexer_advance(lexer); lex_state = state_index; goto next_state; }
@ -73,7 +74,7 @@ static const ts_parse_action ts_parse_actions[ts_state_count][ts_symbol_count]
#define EXPORT_PARSER(constructor_name) \
ts_parser constructor_name() { \
return (ts_parser){ \
return (ts_parser) { \
.parse_fn = ts_parse, \
.symbol_names = ts_symbol_names, \
.data = ts_lr_parser_make(ts_symbol_count, (const ts_parse_action *)ts_parse_actions, ts_lex_states, hidden_symbol_flags), \
@ -161,6 +162,10 @@ static void ts_lexer_advance(ts_lexer *lexer) {
}
}
static void ts_lexer_start_token(ts_lexer *lexer) {
lexer->token_start_position = ts_lexer_position(lexer);
}
static ts_tree * ts_lexer_build_node(ts_lexer *lexer, ts_symbol symbol) {
size_t current_position = ts_lexer_position(lexer);
size_t size = current_position - lexer->token_start_position;
@ -169,12 +174,6 @@ static ts_tree * ts_lexer_build_node(ts_lexer *lexer, ts_symbol symbol) {
return ts_tree_make_leaf(symbol, size, offset);
}
static void ts_lexer_skip_whitespace(ts_lexer *lexer) {
while (isspace(ts_lexer_lookahead_char(lexer)))
ts_lexer_advance(lexer);
lexer->token_start_position = ts_lexer_position(lexer);
}
static const state_id ts_lex_state_error = -1;

View file

@ -1,80 +0,0 @@
#include "compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/build_tables/build_tables.h"
#include <functional>
using namespace rules;
using build_tables::build_tables;
static set<Symbol> keys(const map<Symbol, ParseAction> &map) {
set<Symbol> result;
for (auto pair : map) {
result.insert(pair.first);
}
return result;
}
START_TEST
describe("building parse and lex tables", []() {
PreparedGrammar grammar({
{ "expression", choice({
seq({
sym("term"),
sym("plus"),
sym("term") }),
sym("term") }) },
{ "term", choice({
sym("variable"),
sym("number"),
seq({
sym("left-paren"),
sym("expression"),
sym("right-paren")
}) }) }
}, {});
PreparedGrammar lex_grammar({
{ "plus", str("+") },
{ "variable", pattern("\\a+") },
{ "number", pattern("\\d+") },
{ "left-paren", str("(") },
{ "right-paren", str(")") }
}, {});
ParseTable table;
LexTable lex_table;
before_each([&]() {
pair<ParseTable, LexTable> tables = build_tables::build_tables(grammar, lex_grammar);
table = tables.first;
lex_table = tables.second;
});
function<ParseState(size_t)> parse_state = [&](size_t index) {
return table.states[index];
};
function<LexState(size_t)> lex_state = [&](size_t parse_state_index) {
long index = table.states[parse_state_index].lex_state_id;
return lex_table.states[index];
};
it("has the right starting state", [&]() {
AssertThat(keys(parse_state(0).actions), Equals(set<Symbol>({
Symbol("expression"),
Symbol("term"),
Symbol("number"),
Symbol("variable"),
Symbol("left-paren"),
})));
AssertThat(lex_state(0).expected_inputs(), Equals(set<CharacterSet>({
CharacterSet({ '(' }),
CharacterSet({ {'0', '9'} }),
CharacterSet({ {'a', 'z'}, {'A', 'Z'} }),
})));
});
});
END_TEST

View file

@ -0,0 +1,60 @@
#include "compiler_spec_helper.h"
#include "compiler/rules/metadata.h"
#include "compiler/build_tables/check_metadata.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("checking if rules have metadata", []() {
MetadataValue value = MetadataValue(1 << 3);
it("returns true for a compatible metadata rule", [&]() {
auto rule = make_shared<Metadata>(sym("x"), MetadataValue(value | 1));
AssertThat(check_metadata(rule, value), IsTrue());
});
it("returns false for an incompatible metadata rule", [&]() {
auto rule = make_shared<Metadata>(sym("x"), MetadataValue(1 << 2));
AssertThat(check_metadata(rule, value), IsFalse());
});
it("returns false for a non-metadata rule", [&]() {
auto rule = sym("x");
AssertThat(check_metadata(rule, value), IsFalse());
});
it("returns true for a compatible metadata rule preceded by rules that can be blank", [&]() {
auto rule = seq({
repeat(sym("x")),
make_shared<Metadata>(sym("x"), MetadataValue(value | 1)),
});
AssertThat(check_metadata(rule, value), IsTrue());
});
it("returns true for a choice including a compatible metadata rule", [&]() {
auto rule = choice({
sym("x"),
make_shared<Metadata>(sym("x"), MetadataValue(value | 1)),
});
AssertThat(check_metadata(rule, value), IsTrue());
});
it("returns true for a repetition containing a compatible metadata rule", [&]() {
auto rule = repeat(make_shared<Metadata>(sym("x"), MetadataValue(value | 1)));
AssertThat(check_metadata(rule, value), IsTrue());
});
it("returns true for a metadata rule preceded by rules that cannot be blank", [&]() {
auto rule = seq({
sym("x"),
make_shared<Metadata>(sym("x"), MetadataValue(value | 1)),
});
AssertThat(check_metadata(rule, value), IsFalse());
});
});
END_TEST

View file

@ -1,6 +1,7 @@
#include "compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/build_tables/first_set.h"
#include "compiler/rules/metadata.h"
using std::set;
using namespace build_tables;
@ -83,6 +84,14 @@ describe("computing FIRST sets", []() {
})));
});
});
it("ignores metadata rules", [&]() {
auto rule = make_shared<Metadata>(sym("x"), MetadataValue(1));
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
Symbol("x"),
})));
});
});
END_TEST

View file

@ -0,0 +1,37 @@
#include "compiler_spec_helper.h"
#include "compiler/build_tables/merge_transitions.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("merging character set transitions", []() {
typedef map<CharacterSet, int> int_map;
auto bitwise = [](int l, int r) -> int {
return l + r;
};
describe("when two of the right transitions intersect one of the left transitions", [&]() {
it("splits the left-hand transition correctly", [&]() {
int_map map1({
{ CharacterSet({ 'a', 'c' }), 1 },
});
int_map map2({
{ CharacterSet({ 'a' }), 2 },
{ CharacterSet({ 'c' }), 4 },
});
AssertThat(merge_char_transitions<int>(map1, map2, bitwise), Equals(int_map({
{ CharacterSet({ 'a' }), 3 },
{ CharacterSet({ 'c' }), 5 },
})));
AssertThat(merge_char_transitions<int>(map2, map1, bitwise), Equals(merge_char_transitions<int>(map1, map2, bitwise)));
});
});
});
END_TEST

View file

@ -1,5 +1,6 @@
#include "compiler_spec_helper.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/metadata.h"
#include "compiler/prepared_grammar.h"
using namespace rules;
@ -45,6 +46,14 @@ describe("checking if rules can be blank", [&]() {
rule = seq({ blank(), choice({ sym("x"), blank() }) });
AssertThat(rule_can_be_blank(rule), IsTrue());
});
it("ignores metadata rules", [&]() {
rule = make_shared<rules::Metadata>(blank(), rules::MetadataValue(0));
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = make_shared<rules::Metadata>(sym("one"), rules::MetadataValue(0));
AssertThat(rule_can_be_blank(rule), IsFalse());
});
describe("checking recursively (by expanding non-terminals)", [&]() {
PreparedGrammar grammar({

View file

@ -171,10 +171,7 @@ describe("rule transitions", []() {
CharacterSet({ 'a' }),
seq({
character({ 'b' }),
choice({
rule,
blank()
})
rule,
})
}})));
@ -182,13 +179,8 @@ describe("rule transitions", []() {
AssertThat(
char_transitions(rule),
Equals(rule_map<CharacterSet>({
{
CharacterSet({ 'a' }),
choice({
rule,
blank()
})
}})));
{ CharacterSet({ 'a' }), rule }
})));
});
describe("regression tests (somewhat redundant, should maybe be deleted later)", []() {
@ -203,10 +195,7 @@ describe("rule transitions", []() {
AssertThat(char_transitions(rule), Equals(rule_map<CharacterSet>({
{ CharacterSet({ '"' }).complement(), seq({
choice({
repeat(character({ '"' }, false)),
blank(),
}),
repeat(character({ '"' }, false)),
character({ '"' }), }) },
{ CharacterSet({ '"' }), blank() },
})));

View file

@ -1,7 +1,7 @@
==========================================
parses multiple statements
==========================================
var x = {};
var x = {}
firstFunction(x);
secondFunction(x);
---

View file

@ -4,11 +4,16 @@
#include <unordered_map>
#include "compiler/prepared_grammar.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/seq.h"
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/first_set.h"
#include "stream_methods.h"
namespace tree_sitter {
using std::pair;
using std::string;
@ -48,13 +53,20 @@ namespace tree_sitter {
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
for (auto transition : char_transitions(item_set, grammar)) {
auto transitions = char_transitions(item_set, grammar);
for (auto transition : transitions) {
CharacterSet rule = transition.first;
LexItemSet item_set = transition.second;
LexStateId new_state_id = add_lex_state(item_set);
LexItemSet new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
lex_table.add_action(state_id, rule, LexAction::Advance(new_state_id));
}
}
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
for (auto &item : item_set)
if (item.has_metadata(rules::START_TOKEN))
lex_table.state(state_id).is_token_start = true;
}
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
for (LexItem item : item_set) {
@ -80,23 +92,35 @@ namespace tree_sitter {
}
}
}
rules::rule_ptr after_separators(rules::rule_ptr rule) {
return rules::Seq::Build({
make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
make_shared<rules::Metadata>(rule, rules::START_TOKEN)
});
}
LexItemSet lex_item_set_for_parse_state(const ParseState &state) {
LexItemSet result;
for (auto &symbol : state.expected_inputs())
if (lex_grammar.has_definition(symbol)) {
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
}
result.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy())));
return result;
}
void assign_lex_state(ParseStateId state_id) {
ParseState &state = parse_table.states[state_id];
LexItemSet item_set;
for (auto &symbol : state.expected_inputs()) {
if (lex_grammar.has_definition(symbol))
item_set.insert(LexItem(symbol, lex_grammar.rule(symbol)));
}
state.lex_state_id = add_lex_state(item_set);
state.lex_state_id = add_lex_state(lex_item_set_for_parse_state(state));
}
LexStateId add_lex_state(const LexItemSet &item_set) {
auto state_id = lex_state_id_for_item_set(item_set);
if (state_id == NOT_FOUND) {
state_id = lex_table.add_state();
lex_state_ids[item_set] = state_id;
add_token_start(item_set, state_id);
add_advance_actions(item_set, state_id);
add_accept_token_actions(item_set, state_id);
}
@ -119,13 +143,14 @@ namespace tree_sitter {
void add_error_lex_state() {
LexItemSet error_item_set;
for (auto &pair : lex_grammar.rules) {
LexItem item(Symbol(pair.first, rules::SymbolTypeNormal), pair.second);
LexItem item(Symbol(pair.first, rules::SymbolTypeNormal), after_separators(pair.second));
error_item_set.insert(item);
}
for (auto &pair : lex_grammar.aux_rules) {
LexItem item(Symbol(pair.first, rules::SymbolTypeAuxiliary), pair.second);
LexItem item(Symbol(pair.first, rules::SymbolTypeAuxiliary), after_separators(pair.second));
error_item_set.insert(item);
}
error_item_set.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy())));
add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID);
add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID);
}

View file

@ -0,0 +1,39 @@
#include "check_metadata.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/visitor.h"
#include "compiler/build_tables/rule_can_be_blank.h"
namespace tree_sitter {
namespace build_tables {
class HasMetadata : public rules::RuleFn<bool> {
rules::MetadataValue metadata_value;
public:
HasMetadata(rules::MetadataValue value) : metadata_value(value) {}
void visit(const rules::Choice *rule) {
value = apply(rule->left) || apply(rule->right);
}
void visit(const rules::Repeat *rule) {
value = apply(rule->content);
}
void visit(const rules::Seq *rule) {
bool result = apply(rule->left);
if (rule_can_be_blank(rule->left))
result = result || apply(rule->right);
value = result;
}
void visit(const rules::Metadata *rule) {
value = rule->value & metadata_value;
}
};
bool check_metadata(const rules::rule_ptr &rule, rules::MetadataValue value) {
return HasMetadata(value).apply(rule);
}
}
}

View file

@ -0,0 +1,13 @@
#ifndef COMPILER_BUILD_TABLES_CHECK_METADATA_
#define COMPILER_BUILD_TABLES_CHECK_METADATA_
#include "compiler/rules/rule.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
bool check_metadata(const rules::rule_ptr &rule, rules::MetadataValue value);
}
}
#endif // COMPILER_BUILD_TABLES_CHECK_METADATA_

View file

@ -2,6 +2,7 @@
#include "tree_sitter/compiler.h"
#include "compiler/prepared_grammar.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
@ -34,6 +35,10 @@ namespace tree_sitter {
}
}
}
void visit(const rules::Metadata *rule) {
value = apply(rule->rule);
}
void visit(const rules::Choice *rule) {
value = set_union(apply(rule->left), apply(rule->right));

View file

@ -1,5 +1,6 @@
#include "compiler/build_tables/item.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/build_tables/check_metadata.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
@ -18,7 +19,11 @@ namespace tree_sitter {
bool Item::is_done() const {
return rule_can_be_blank(rule);
}
bool Item::has_metadata(rules::MetadataValue value) const {
return check_metadata(rule, value);
}
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item ") <<

View file

@ -5,6 +5,7 @@
#include <string>
#include <vector>
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
class Grammar;
@ -12,23 +13,24 @@ namespace tree_sitter {
namespace build_tables {
class Item {
public:
Item(const rules::Symbol &lhs, const rules::rule_ptr rule);
Item(const rules::Symbol &lhs, rules::rule_ptr rule);
bool is_done() const;
bool has_metadata(rules::MetadataValue) const;
const rules::Symbol lhs;
const rules::rule_ptr rule;
rules::Symbol lhs;
rules::rule_ptr rule;
};
class LexItem : public Item {
public:
LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule);
LexItem(const rules::Symbol &lhs, rules::rule_ptr rule);
bool operator==(const LexItem &other) const;
};
class ParseItem : public Item {
public:
ParseItem(const rules::Symbol &lhs,
const rules::rule_ptr rule,
rules::rule_ptr rule,
const size_t consumed_symbol_count,
const rules::Symbol &lookahead_sym);
bool operator==(const ParseItem &other) const;

View file

@ -50,24 +50,32 @@ namespace tree_sitter {
std::function<T(T, T)> merge_fn) {
std::map<rules::CharacterSet, T> result(left);
for (auto &new_pair : right) {
rules::CharacterSet new_rule = new_pair.first;
rules::CharacterSet new_char_set = new_pair.first;
T new_value = new_pair.second;
for (auto &existing_pair : left) {
rules::CharacterSet existing_rule = existing_pair.first;
T existing_value = existing_pair.second;
std::map<rules::CharacterSet, T> pairs_to_insert;
auto iter = result.begin();
while (iter != result.end()) {
rules::CharacterSet char_set = iter->first;
T value = iter->second;
rules::CharacterSet intersection = existing_rule.remove_set(new_rule);
rules::CharacterSet intersection = char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
result.erase(existing_pair.first);
if (!existing_rule.is_empty())
result.insert({ existing_rule, existing_value });
result.insert({ intersection, merge_fn(existing_value, new_value) });
new_rule.remove_set(intersection);
new_char_set.remove_set(intersection);
if (!char_set.is_empty())
pairs_to_insert.insert({ char_set, value });
pairs_to_insert.insert({ intersection, merge_fn(value, new_value) });
result.erase(iter++);
} else {
++iter;
}
}
if (!new_rule.is_empty())
result.insert({ new_rule, new_pair.second });
result.insert(pairs_to_insert.begin(), pairs_to_insert.end());
if (!new_char_set.is_empty())
result.insert({ new_char_set, new_pair.second });
}
return result;
}

View file

@ -7,6 +7,7 @@
#include "compiler/rules/seq.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
using std::set;
@ -33,6 +34,10 @@ namespace tree_sitter {
void visit(const rules::Seq *rule) {
value = apply(rule->left) && apply(rule->right);
}
void visit(const rules::Metadata *rule) {
value = apply(rule->rule);
}
};
class CanBeBlankRecursive : public CanBeBlank {

View file

@ -7,6 +7,7 @@
#include "compiler/rules/seq.h"
#include "compiler/rules/string.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/visitor.h"
@ -65,27 +66,32 @@ namespace tree_sitter {
}
void visit(const rules::Choice *rule) {
this->value = merge_transitions<T>(this->apply(rule->left),
this->apply(rule->right));
auto left_transitions = this->apply(rule->left);
auto right_transitions = this->apply(rule->right);
this->value = merge_transitions<T>(left_transitions,
right_transitions);
}
void visit(const rules::Seq *rule) {
auto result = map_transitions(this->apply(rule->left), [&](const rule_ptr left_rule) {
return rules::Seq::Build({ left_rule, rule->right });
});
if (rule_can_be_blank(rule->left))
result = merge_transitions<T>(result, this->apply(rule->right));
if (rule_can_be_blank(rule->left)) {
auto right_transitions = this->apply(rule->right);
result = merge_transitions<T>(result, right_transitions);
}
this->value = result;
}
void visit(const rules::Repeat *rule) {
this->value = map_transitions(this->apply(rule->content), [&](const rule_ptr &value) {
return rules::Seq::Build({
value,
make_shared<rules::Choice>(rule->copy(), make_shared<rules::Blank>())
});
return rules::Seq::Build({ value, rule->copy() });
});
}
void visit(const rules::Metadata *rule) {
this->value = this->apply(rule->rule);
}
void visit(const rules::String *rule) {
rule_ptr result = make_shared<rules::Blank>();

View file

@ -145,22 +145,24 @@ namespace tree_sitter {
}
}
string switch_on_lookahead_char(const LexState &parse_state) {
string code_for_lex_state(const LexState &lex_state) {
string result = "";
auto expected_inputs = parse_state.expected_inputs();
for (auto pair : parse_state.actions)
auto expected_inputs = lex_state.expected_inputs();
if (lex_state.is_token_start)
result += "START_TOKEN();" "\n";
for (auto pair : lex_state.actions)
if (!pair.first.is_empty())
result += _if(condition_for_character_rule(pair.first),
code_for_lex_actions(pair.second, expected_inputs));
result += code_for_lex_actions(parse_state.default_action, expected_inputs);
result += code_for_lex_actions(lex_state.default_action, expected_inputs);
return result;
}
string switch_on_lex_state() {
string body = "";
for (size_t i = 0; i < lex_table.states.size(); i++)
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
body += _case("ts_lex_state_error", switch_on_lookahead_char(lex_table.error_state));
body += _case(std::to_string(i), code_for_lex_state(lex_table.states[i]));
body += _case("ts_lex_state_error", code_for_lex_state(lex_table.error_state));
body += _default("LEX_PANIC();");
return _switch("lex_state", body);
}

View file

@ -35,6 +35,10 @@ namespace tree_sitter {
return "\\\"";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case '\\':
return "\\\\";
default:

View file

@ -57,19 +57,19 @@ namespace tree_sitter {
return states.size() - 1;
}
LexState & state(LexTable *table, LexStateId id) {
LexState & LexTable::state(LexStateId id) {
if (id < 0)
return table->error_state;
return error_state;
else
return table->states[id];
return states[id];
}
void LexTable::add_action(LexStateId id, CharacterSet match, LexAction action) {
state(this, id).actions[match] = action;
state(id).actions[match] = action;
}
void LexTable::add_default_action(LexStateId id, LexAction action) {
state(this, id).default_action = action;
state(id).default_action = action;
}
const LexStateId LexTable::ERROR_STATE_ID = -1;

View file

@ -49,6 +49,7 @@ namespace tree_sitter {
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::set<rules::CharacterSet> expected_inputs() const;
bool is_token_start;
};
typedef int64_t LexStateId;
@ -59,6 +60,7 @@ namespace tree_sitter {
LexStateId add_state();
void add_action(LexStateId state_id, rules::CharacterSet rule, LexAction action);
void add_default_action(LexStateId state_id, LexAction action);
LexState & state(LexStateId state_id);
std::vector<LexState> states;
LexState error_state;

View file

@ -26,6 +26,12 @@ namespace tree_sitter {
switch (input) {
case '\0':
return "<EOF>";
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case MAX_CHAR:
return "<MAX>";
default:

View file

@ -0,0 +1,34 @@
#include "compiler/rules/metadata.h"
#include <string>
#include "compiler/rules/visitor.h"
#include <map>
namespace tree_sitter {
using std::hash;
using std::make_shared;
namespace rules {
Metadata::Metadata(rule_ptr rule, MetadataValue value) : rule(rule), value(value) {}
bool Metadata::operator==(const Rule &rule) const {
auto other = dynamic_cast<const Metadata *>(&rule);
return other && other->value == value && other->rule->operator==(*this->rule);
}
size_t Metadata::hash_code() const {
return hash<int>()(value);
}
rule_ptr Metadata::copy() const {
return make_shared<Metadata>(rule, value);
}
std::string Metadata::to_string() const {
return "#<metadata " + rule->to_string() + ">";
}
void Metadata::accept(Visitor *visitor) const {
visitor->visit(this);
}
}
}

View file

@ -0,0 +1,30 @@
#ifndef COMPILER_RULES_METADATA_H_
#define COMPILER_RULES_METADATA_H_
#include <string>
#include "compiler/rules/rule.h"
namespace tree_sitter {
namespace rules {
typedef enum {
NONE = 0,
START_TOKEN = 1,
} MetadataValue;
class Metadata : public Rule {
public:
Metadata(rule_ptr rule, MetadataValue value);
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor *visitor) const;
const rule_ptr rule;
const MetadataValue value;
};
}
}
#endif // COMPILER_RULES_METADATA_H_

View file

@ -7,6 +7,7 @@
#include "compiler/rules/repeat.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/blank.h"
#include "compiler/util/string_helpers.h"
namespace tree_sitter {
namespace rules {
@ -183,7 +184,7 @@ namespace tree_sitter {
}
string Pattern::to_string() const {
return string("#<pattern '") + value + "'>";
return string("#<pattern '") + util::escape_string(value) + "'>";
}
void Pattern::accept(Visitor *visitor) const {

View file

@ -6,6 +6,7 @@
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/repeat.h"
@ -15,6 +16,7 @@ namespace tree_sitter {
void Visitor::visit(const Blank *rule) { default_visit(rule); }
void Visitor::visit(const CharacterSet *rule) { default_visit(rule); }
void Visitor::visit(const Choice *rule) { default_visit(rule); }
void Visitor::visit(const Metadata *rule) { default_visit(rule); }
void Visitor::visit(const Pattern *rule) { default_visit(rule); }
void Visitor::visit(const Repeat *rule) { default_visit(rule); }
void Visitor::visit(const Seq *rule) { default_visit(rule); }
@ -36,5 +38,9 @@ namespace tree_sitter {
void IdentityRuleFn::visit(const Repeat *rule) {
value = std::make_shared<Repeat>(apply(rule->content));
}
void IdentityRuleFn::visit(const Metadata *rule) {
value = std::make_shared<Metadata>(apply(rule->rule), rule->value);
}
}
}

View file

@ -13,6 +13,7 @@ namespace tree_sitter {
class Seq;
class String;
class Pattern;
class Metadata;
class Visitor {
public:
@ -20,6 +21,7 @@ namespace tree_sitter {
virtual void visit(const Blank *rule);
virtual void visit(const CharacterSet *rule);
virtual void visit(const Choice *rule);
virtual void visit(const Metadata *rule);
virtual void visit(const Pattern *rule);
virtual void visit(const Repeat *rule);
virtual void visit(const Seq *rule);
@ -41,8 +43,9 @@ namespace tree_sitter {
class IdentityRuleFn : public RuleFn<rule_ptr> {
virtual void default_visit(const Rule *rule);
virtual void visit(const Seq *rule);
virtual void visit(const Choice *rule);
virtual void visit(const Metadata *rule);
virtual void visit(const Seq *rule);
virtual void visit(const Repeat *rule);
};
}