Expand regex/string rules as part of grammar preparation

This makes it possible to report errors in regex parsing
This commit is contained in:
Max Brunsfeld 2014-05-19 20:54:59 -07:00
parent 5245bc01fe
commit 649f200831
26 changed files with 883 additions and 651 deletions

View file

@ -19,7 +19,7 @@ namespace tree_sitter_examples {
str(":"),
sym("value") })))) },
{ "array", in_brackets(comma_sep(err(sym("value")))) },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "string", pattern("\"([^\"]|\\\\\")*\"") },
{ "number", pattern("\\d+(\\.\\d+)?") },
{ "null", keyword("null") },
{ "true", keyword("true") },

View file

@ -34,13 +34,13 @@ SYMBOL_NAMES = {
[ts_builtin_sym_end] = "end",
[ts_sym_number] = "number",
[ts_sym_variable] = "variable",
[ts_aux_sym_token0] = "'+'",
[ts_aux_sym_token1] = "'-'",
[ts_aux_sym_token2] = "'*'",
[ts_aux_sym_token3] = "'/'",
[ts_aux_sym_token4] = "'^'",
[ts_aux_sym_token5] = "'('",
[ts_aux_sym_token6] = "')'",
[ts_aux_sym_token0] = "",
[ts_aux_sym_token1] = "",
[ts_aux_sym_token2] = "",
[ts_aux_sym_token3] = "",
[ts_aux_sym_token4] = "",
[ts_aux_sym_token5] = "",
[ts_aux_sym_token6] = "",
};
UBIQUITOUS_SYMBOLS = {

View file

@ -109,34 +109,34 @@ SYMBOL_NAMES = {
[ts_aux_sym__func_signature_repeat2] = "_func_signature_repeat2",
[ts_aux_sym__func_signature_repeat3] = "_func_signature_repeat3",
[ts_aux_sym__func_signature_repeat4] = "_func_signature_repeat4",
[ts_aux_sym_token0] = "'package'",
[ts_aux_sym_token1] = "'import'",
[ts_aux_sym_token2] = "'('",
[ts_aux_sym_token3] = "')'",
[ts_aux_sym_token4] = "'type'",
[ts_aux_sym_token5] = "'var'",
[ts_aux_sym_token6] = "'='",
[ts_aux_sym_token7] = "'func'",
[ts_aux_sym_token8] = "'{'",
[ts_aux_sym_token9] = "'}'",
[ts_aux_sym_token10] = "'*'",
[ts_aux_sym_token11] = "'map'",
[ts_aux_sym_token12] = "'['",
[ts_aux_sym_token13] = "']'",
[ts_aux_sym_token14] = "'struct'",
[ts_aux_sym_token15] = "'interface'",
[ts_aux_sym_token16] = "'/'",
[ts_aux_sym_token17] = "'+'",
[ts_aux_sym_token18] = "'-'",
[ts_aux_sym_token19] = "'||'",
[ts_aux_sym_token20] = "'&&'",
[ts_aux_sym_token21] = "'=='",
[ts_aux_sym_token22] = "'<='",
[ts_aux_sym_token23] = "'<'",
[ts_aux_sym_token24] = "'>='",
[ts_aux_sym_token25] = "'>'",
[ts_aux_sym_token26] = "'!'",
[ts_aux_sym_token27] = "','",
[ts_aux_sym_token0] = "",
[ts_aux_sym_token1] = "",
[ts_aux_sym_token2] = "",
[ts_aux_sym_token3] = "",
[ts_aux_sym_token4] = "",
[ts_aux_sym_token5] = "",
[ts_aux_sym_token6] = "",
[ts_aux_sym_token7] = "",
[ts_aux_sym_token8] = "",
[ts_aux_sym_token9] = "",
[ts_aux_sym_token10] = "",
[ts_aux_sym_token11] = "",
[ts_aux_sym_token12] = "",
[ts_aux_sym_token13] = "",
[ts_aux_sym_token14] = "",
[ts_aux_sym_token15] = "",
[ts_aux_sym_token16] = "",
[ts_aux_sym_token17] = "",
[ts_aux_sym_token18] = "",
[ts_aux_sym_token19] = "",
[ts_aux_sym_token20] = "",
[ts_aux_sym_token21] = "",
[ts_aux_sym_token22] = "",
[ts_aux_sym_token23] = "",
[ts_aux_sym_token24] = "",
[ts_aux_sym_token25] = "",
[ts_aux_sym_token26] = "",
[ts_aux_sym_token27] = "",
};
UBIQUITOUS_SYMBOLS = {

View file

@ -143,52 +143,52 @@ SYMBOL_NAMES = {
[ts_aux_sym_formal_parameters_repeat0] = "formal_parameters_repeat0",
[ts_aux_sym_object_repeat0] = "object_repeat0",
[ts_aux_sym_array_repeat0] = "array_repeat0",
[ts_aux_sym_token0] = "'{'",
[ts_aux_sym_token1] = "'}'",
[ts_aux_sym_token2] = "'for'",
[ts_aux_sym_token3] = "'('",
[ts_aux_sym_token4] = "')'",
[ts_aux_sym_token5] = "'if'",
[ts_aux_sym_token6] = "'else'",
[ts_aux_sym_token7] = "'while'",
[ts_aux_sym_token8] = "'try'",
[ts_aux_sym_token9] = "'catch'",
[ts_aux_sym_token10] = "'switch'",
[ts_aux_sym_token11] = "'case'",
[ts_aux_sym_token12] = "'default'",
[ts_aux_sym_token13] = "':'",
[ts_aux_sym_token14] = "'break'",
[ts_aux_sym_token15] = "'var'",
[ts_aux_sym_token16] = "','",
[ts_aux_sym_token17] = "'return'",
[ts_aux_sym_token18] = "'delete'",
[ts_aux_sym_token19] = "'++'",
[ts_aux_sym_token20] = "'--'",
[ts_aux_sym_token21] = "'+'",
[ts_aux_sym_token22] = "'-'",
[ts_aux_sym_token23] = "'*'",
[ts_aux_sym_token24] = "'/'",
[ts_aux_sym_token25] = "'&'",
[ts_aux_sym_token26] = "'|'",
[ts_aux_sym_token27] = "'^'",
[ts_aux_sym_token28] = "'||'",
[ts_aux_sym_token29] = "'&&'",
[ts_aux_sym_token30] = "'==='",
[ts_aux_sym_token31] = "'=='",
[ts_aux_sym_token32] = "'!=='",
[ts_aux_sym_token33] = "'!='",
[ts_aux_sym_token34] = "'<='",
[ts_aux_sym_token35] = "'<'",
[ts_aux_sym_token36] = "'>='",
[ts_aux_sym_token37] = "'>'",
[ts_aux_sym_token38] = "'!'",
[ts_aux_sym_token39] = "'?'",
[ts_aux_sym_token40] = "'='",
[ts_aux_sym_token41] = "'function'",
[ts_aux_sym_token42] = "'new'",
[ts_aux_sym_token43] = "'.'",
[ts_aux_sym_token44] = "'['",
[ts_aux_sym_token45] = "']'",
[ts_aux_sym_token0] = "",
[ts_aux_sym_token1] = "",
[ts_aux_sym_token2] = "",
[ts_aux_sym_token3] = "",
[ts_aux_sym_token4] = "",
[ts_aux_sym_token5] = "",
[ts_aux_sym_token6] = "",
[ts_aux_sym_token7] = "",
[ts_aux_sym_token8] = "",
[ts_aux_sym_token9] = "",
[ts_aux_sym_token10] = "",
[ts_aux_sym_token11] = "",
[ts_aux_sym_token12] = "",
[ts_aux_sym_token13] = "",
[ts_aux_sym_token14] = "",
[ts_aux_sym_token15] = "",
[ts_aux_sym_token16] = "",
[ts_aux_sym_token17] = "",
[ts_aux_sym_token18] = "",
[ts_aux_sym_token19] = "",
[ts_aux_sym_token20] = "",
[ts_aux_sym_token21] = "",
[ts_aux_sym_token22] = "",
[ts_aux_sym_token23] = "",
[ts_aux_sym_token24] = "",
[ts_aux_sym_token25] = "",
[ts_aux_sym_token26] = "",
[ts_aux_sym_token27] = "",
[ts_aux_sym_token28] = "",
[ts_aux_sym_token29] = "",
[ts_aux_sym_token30] = "",
[ts_aux_sym_token31] = "",
[ts_aux_sym_token32] = "",
[ts_aux_sym_token33] = "",
[ts_aux_sym_token34] = "",
[ts_aux_sym_token35] = "",
[ts_aux_sym_token36] = "",
[ts_aux_sym_token37] = "",
[ts_aux_sym_token38] = "",
[ts_aux_sym_token39] = "",
[ts_aux_sym_token40] = "",
[ts_aux_sym_token41] = "",
[ts_aux_sym_token42] = "",
[ts_aux_sym_token43] = "",
[ts_aux_sym_token44] = "",
[ts_aux_sym_token45] = "",
};
UBIQUITOUS_SYMBOLS = {

View file

@ -35,12 +35,12 @@ SYMBOL_NAMES = {
[ts_sym_false] = "false",
[ts_aux_sym_object_repeat0] = "object_repeat0",
[ts_aux_sym_array_repeat0] = "array_repeat0",
[ts_aux_sym_token0] = "'{'",
[ts_aux_sym_token1] = "':'",
[ts_aux_sym_token2] = "','",
[ts_aux_sym_token3] = "'}'",
[ts_aux_sym_token4] = "'['",
[ts_aux_sym_token5] = "']'",
[ts_aux_sym_token0] = "",
[ts_aux_sym_token1] = "",
[ts_aux_sym_token2] = "",
[ts_aux_sym_token3] = "",
[ts_aux_sym_token4] = "",
[ts_aux_sym_token5] = "",
};
UBIQUITOUS_SYMBOLS = {
@ -69,90 +69,87 @@ LEX_FN() {
if (lookahead == '\"')
ADVANCE(2);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
LEX_ERROR();
case 2:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(3);
if (lookahead == '\\')
ADVANCE(5);
ADVANCE(4);
LEX_ERROR();
case 3:
ACCEPT_TOKEN(ts_sym_string);
case 4:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(4);
if (lookahead == '\\')
ADVANCE(5);
if (lookahead == '\\')
ADVANCE(4);
LEX_ERROR();
case 4:
ACCEPT_TOKEN(ts_sym_string);
case 5:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
ADVANCE(2);
if (lookahead == '\"')
ADVANCE(6);
ADVANCE(3);
if (lookahead == '\\')
ADVANCE(5);
LEX_ERROR();
case 6:
if (!((lookahead == '\"') ||
(lookahead == '\\')))
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if (lookahead == '\\')
ADVANCE(5);
ACCEPT_TOKEN(ts_sym_string);
case 7:
case 6:
if (lookahead == '.')
ADVANCE(8);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(6);
ACCEPT_TOKEN(ts_sym_number);
case 7:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(8);
LEX_ERROR();
case 8:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(9);
LEX_ERROR();
case 9:
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(9);
ADVANCE(8);
ACCEPT_TOKEN(ts_sym_number);
case 10:
case 9:
ACCEPT_TOKEN(ts_aux_sym_token4);
case 11:
case 10:
if (lookahead == 'a')
ADVANCE(11);
LEX_ERROR();
case 11:
if (lookahead == 'l')
ADVANCE(12);
LEX_ERROR();
case 12:
if (lookahead == 'l')
if (lookahead == 's')
ADVANCE(13);
LEX_ERROR();
case 13:
if (lookahead == 's')
if (lookahead == 'e')
ADVANCE(14);
LEX_ERROR();
case 14:
if (lookahead == 'e')
ADVANCE(15);
LEX_ERROR();
case 15:
ACCEPT_TOKEN(ts_sym_false);
case 16:
case 15:
if (lookahead == 'u')
ADVANCE(16);
LEX_ERROR();
case 16:
if (lookahead == 'l')
ADVANCE(17);
LEX_ERROR();
case 17:
@ -160,65 +157,71 @@ LEX_FN() {
ADVANCE(18);
LEX_ERROR();
case 18:
if (lookahead == 'l')
ADVANCE(19);
LEX_ERROR();
case 19:
ACCEPT_TOKEN(ts_sym_null);
case 20:
case 19:
if (lookahead == 'r')
ADVANCE(20);
LEX_ERROR();
case 20:
if (lookahead == 'u')
ADVANCE(21);
LEX_ERROR();
case 21:
if (lookahead == 'u')
if (lookahead == 'e')
ADVANCE(22);
LEX_ERROR();
case 22:
if (lookahead == 'e')
ADVANCE(23);
LEX_ERROR();
case 23:
ACCEPT_TOKEN(ts_sym_true);
case 24:
case 23:
ACCEPT_TOKEN(ts_aux_sym_token0);
case 25:
case 24:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(26);
ADVANCE(25);
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(25);
ADVANCE(24);
LEX_ERROR();
case 26:
case 25:
ACCEPT_TOKEN(ts_builtin_sym_end);
case 27:
case 26:
START_TOKEN();
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(27);
ADVANCE(26);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
case 28:
case 27:
ACCEPT_TOKEN(ts_aux_sym_token3);
case 29:
case 28:
START_TOKEN();
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(29);
ADVANCE(28);
if (lookahead == ',')
ADVANCE(29);
if (lookahead == '}')
ADVANCE(27);
LEX_ERROR();
case 29:
ACCEPT_TOKEN(ts_aux_sym_token2);
case 30:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(30);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
case 30:
ACCEPT_TOKEN(ts_aux_sym_token2);
case 31:
START_TOKEN();
if ((lookahead == '\t') ||
@ -226,8 +229,8 @@ LEX_FN() {
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(31);
if (lookahead == '}')
ADVANCE(28);
if (lookahead == '\"')
ADVANCE(2);
LEX_ERROR();
case 32:
START_TOKEN();
@ -236,128 +239,118 @@ LEX_FN() {
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(32);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == ':')
ADVANCE(33);
LEX_ERROR();
case 33:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(33);
if (lookahead == ':')
ADVANCE(34);
LEX_ERROR();
case 34:
ACCEPT_TOKEN(ts_aux_sym_token1);
case 35:
case 34:
START_TOKEN();
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(35);
ADVANCE(34);
if (lookahead == '\"')
ADVANCE(2);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == ']')
ADVANCE(36);
ADVANCE(35);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
LEX_ERROR();
case 36:
case 35:
ACCEPT_TOKEN(ts_aux_sym_token5);
case 37:
case 36:
START_TOKEN();
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(36);
if (lookahead == ',')
ADVANCE(29);
if (lookahead == ']')
ADVANCE(35);
LEX_ERROR();
case 37:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(37);
if (lookahead == ',')
ADVANCE(30);
if (lookahead == ']')
ADVANCE(36);
ADVANCE(35);
LEX_ERROR();
case 38:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(38);
if (lookahead == ']')
ADVANCE(36);
LEX_ERROR();
case 39:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(26);
ADVANCE(25);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(39);
ADVANCE(38);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == ',')
ADVANCE(30);
ADVANCE(29);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == ':')
ADVANCE(34);
ADVANCE(33);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == ']')
ADVANCE(36);
ADVANCE(35);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == '\0')
ADVANCE(26);
ADVANCE(25);
if (('\t' <= lookahead && lookahead <= '\n') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(39);
ADVANCE(38);
if (lookahead == '\"')
ADVANCE(2);
if (lookahead == ',')
ADVANCE(30);
ADVANCE(29);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(7);
ADVANCE(6);
if (lookahead == ':')
ADVANCE(34);
ADVANCE(33);
if (lookahead == '[')
ADVANCE(10);
ADVANCE(9);
if (lookahead == ']')
ADVANCE(36);
ADVANCE(35);
if (lookahead == 'f')
ADVANCE(11);
ADVANCE(10);
if (lookahead == 'n')
ADVANCE(16);
ADVANCE(15);
if (lookahead == 't')
ADVANCE(20);
ADVANCE(19);
if (lookahead == '{')
ADVANCE(24);
ADVANCE(23);
if (lookahead == '}')
ADVANCE(28);
ADVANCE(27);
LEX_ERROR();
default:
LEX_PANIC();
@ -366,65 +359,65 @@ LEX_FN() {
LEX_STATES = {
[0] = 1,
[1] = 25,
[2] = 25,
[3] = 27,
[4] = 29,
[5] = 31,
[6] = 25,
[7] = 32,
[8] = 29,
[9] = 31,
[10] = 33,
[1] = 24,
[2] = 24,
[3] = 26,
[4] = 28,
[5] = 30,
[6] = 24,
[7] = 31,
[8] = 28,
[9] = 30,
[10] = 32,
[11] = 1,
[12] = 29,
[13] = 31,
[14] = 29,
[15] = 27,
[16] = 29,
[17] = 31,
[18] = 29,
[19] = 33,
[12] = 28,
[13] = 30,
[14] = 28,
[15] = 26,
[16] = 28,
[17] = 30,
[18] = 28,
[19] = 32,
[20] = 1,
[21] = 29,
[22] = 31,
[23] = 29,
[24] = 35,
[25] = 37,
[26] = 38,
[27] = 29,
[21] = 28,
[22] = 30,
[23] = 28,
[24] = 34,
[25] = 36,
[26] = 37,
[27] = 28,
[28] = 1,
[29] = 37,
[30] = 38,
[31] = 37,
[32] = 27,
[33] = 29,
[34] = 31,
[35] = 37,
[36] = 33,
[29] = 36,
[30] = 37,
[31] = 36,
[32] = 26,
[33] = 28,
[34] = 30,
[35] = 36,
[36] = 32,
[37] = 1,
[38] = 29,
[39] = 31,
[40] = 37,
[41] = 37,
[42] = 35,
[43] = 37,
[44] = 38,
[45] = 37,
[46] = 37,
[47] = 29,
[48] = 29,
[49] = 33,
[38] = 28,
[39] = 30,
[40] = 36,
[41] = 36,
[42] = 34,
[43] = 36,
[44] = 37,
[45] = 36,
[46] = 36,
[47] = 28,
[48] = 28,
[49] = 32,
[50] = 1,
[51] = 29,
[52] = 31,
[53] = 25,
[54] = 25,
[55] = 35,
[56] = 37,
[57] = 38,
[58] = 25,
[59] = 25,
[51] = 28,
[52] = 30,
[53] = 24,
[54] = 24,
[55] = 34,
[56] = 36,
[57] = 37,
[58] = 24,
[59] = 24,
};
#pragma GCC diagnostic push