Handle * quantifier in regex patterns
This commit is contained in:
parent
57ed6da225
commit
50a90e456b
9 changed files with 260 additions and 136 deletions
|
|
@ -39,7 +39,7 @@ namespace test_grammars {
|
|||
comma_sep(err(sym("value"))),
|
||||
_sym("right_bracket"), }) },
|
||||
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
|
||||
{ "number", pattern("\\d+") },
|
||||
{ "number", pattern("\\d+(.\\d+)?") },
|
||||
{ "comma", str(",") },
|
||||
{ "colon", str(":") },
|
||||
{ "left_bracket", str("[") },
|
||||
|
|
|
|||
|
|
@ -66,32 +66,41 @@ LEX_FN() {
|
|||
ADVANCE(10);
|
||||
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
|
||||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
|
||||
ADVANCE(11);
|
||||
ADVANCE(12);
|
||||
LEX_ERROR();
|
||||
case 9:
|
||||
ACCEPT_TOKEN(ts_aux_sym_token1);
|
||||
case 10:
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(10);
|
||||
ADVANCE(11);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 11:
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(11);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 12:
|
||||
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
|
||||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
|
||||
ADVANCE(11);
|
||||
ADVANCE(13);
|
||||
ACCEPT_TOKEN(ts_sym_variable);
|
||||
case 12:
|
||||
case 13:
|
||||
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
|
||||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
|
||||
ADVANCE(13);
|
||||
ACCEPT_TOKEN(ts_sym_variable);
|
||||
case 14:
|
||||
if (LOOKAHEAD_CHAR() == ')')
|
||||
ADVANCE(4);
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(7);
|
||||
LEX_ERROR();
|
||||
case 13:
|
||||
case 15:
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(7);
|
||||
if (LOOKAHEAD_CHAR() == '+')
|
||||
ADVANCE(2);
|
||||
LEX_ERROR();
|
||||
case 14:
|
||||
case 16:
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(7);
|
||||
LEX_ERROR();
|
||||
|
|
@ -108,7 +117,7 @@ LEX_FN() {
|
|||
ADVANCE(10);
|
||||
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
|
||||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
|
||||
ADVANCE(11);
|
||||
ADVANCE(12);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_PANIC();
|
||||
|
|
@ -134,7 +143,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(2);
|
||||
SET_LEX_STATE(13);
|
||||
SET_LEX_STATE(15);
|
||||
REDUCE(ts_sym_plus, ts_sym_term, 1, COLLAPSE({0}))
|
||||
SHIFT(ts_sym_times, 3)
|
||||
REDUCE(ts_builtin_sym_end, ts_sym_term, 1, COLLAPSE({0}))
|
||||
|
|
@ -254,7 +263,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(19);
|
||||
SET_LEX_STATE(12);
|
||||
SET_LEX_STATE(14);
|
||||
SHIFT(ts_sym_times, 20)
|
||||
REDUCE(ts_aux_sym_token2, ts_sym_term, 1, COLLAPSE({0}))
|
||||
END_STATE();
|
||||
|
|
@ -322,7 +331,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(29);
|
||||
SET_LEX_STATE(12);
|
||||
SET_LEX_STATE(14);
|
||||
REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0}))
|
||||
REDUCE(ts_aux_sym_token2, ts_sym_factor, 1, COLLAPSE({0}))
|
||||
END_STATE();
|
||||
|
|
@ -349,13 +358,13 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(33);
|
||||
SET_LEX_STATE(12);
|
||||
SET_LEX_STATE(14);
|
||||
REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
|
||||
REDUCE(ts_aux_sym_token2, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
|
||||
END_STATE();
|
||||
|
||||
STATE(34);
|
||||
SET_LEX_STATE(13);
|
||||
SET_LEX_STATE(15);
|
||||
REDUCE(ts_sym_plus, ts_sym_factor, 1, COLLAPSE({0}))
|
||||
REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0}))
|
||||
REDUCE(ts_builtin_sym_end, ts_sym_factor, 1, COLLAPSE({0}))
|
||||
|
|
@ -377,7 +386,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(37);
|
||||
SET_LEX_STATE(14);
|
||||
SET_LEX_STATE(16);
|
||||
SHIFT(ts_sym_times, 38)
|
||||
REDUCE(ts_builtin_sym_end, ts_sym_term, 1, COLLAPSE({0}))
|
||||
END_STATE();
|
||||
|
|
@ -422,7 +431,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(44);
|
||||
SET_LEX_STATE(14);
|
||||
SET_LEX_STATE(16);
|
||||
REDUCE(ts_sym_times, ts_sym_factor, 1, COLLAPSE({0}))
|
||||
REDUCE(ts_builtin_sym_end, ts_sym_factor, 1, COLLAPSE({0}))
|
||||
END_STATE();
|
||||
|
|
@ -449,7 +458,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(48);
|
||||
SET_LEX_STATE(14);
|
||||
SET_LEX_STATE(16);
|
||||
REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
|
||||
REDUCE(ts_builtin_sym_end, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
|
||||
END_STATE();
|
||||
|
|
@ -471,7 +480,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(51);
|
||||
SET_LEX_STATE(13);
|
||||
SET_LEX_STATE(15);
|
||||
REDUCE(ts_sym_plus, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
|
||||
REDUCE(ts_sym_times, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
|
||||
REDUCE(ts_builtin_sym_end, ts_sym_factor, 3, COLLAPSE({1, 0, 1}))
|
||||
|
|
|
|||
|
|
@ -75,114 +75,143 @@ LEX_FN() {
|
|||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(9);
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(15);
|
||||
ADVANCE(19);
|
||||
if (LOOKAHEAD_CHAR() == '[')
|
||||
ADVANCE(16);
|
||||
ADVANCE(24);
|
||||
if (LOOKAHEAD_CHAR() == 'f')
|
||||
ADVANCE(17);
|
||||
ADVANCE(25);
|
||||
if (LOOKAHEAD_CHAR() == 'n')
|
||||
ADVANCE(22);
|
||||
if (LOOKAHEAD_CHAR() == 't')
|
||||
ADVANCE(26);
|
||||
if (LOOKAHEAD_CHAR() == '{')
|
||||
ADVANCE(30);
|
||||
if (LOOKAHEAD_CHAR() == 't')
|
||||
ADVANCE(34);
|
||||
if (LOOKAHEAD_CHAR() == '{')
|
||||
ADVANCE(38);
|
||||
LEX_ERROR();
|
||||
case 9:
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(10);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(11);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(12);
|
||||
ADVANCE(16);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(14);
|
||||
ADVANCE(18);
|
||||
LEX_ERROR();
|
||||
case 10:
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(10);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(11);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(12);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(14);
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 11:
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 12:
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(10);
|
||||
ADVANCE(11);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(13);
|
||||
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
|
||||
ADVANCE(10);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(12);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(14);
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 12:
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 13:
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(10);
|
||||
ADVANCE(11);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(14);
|
||||
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
|
||||
ADVANCE(11);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(12);
|
||||
ADVANCE(13);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(14);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 14:
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(11);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(12);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(15);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 15:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 16:
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(11);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(17);
|
||||
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
|
||||
ADVANCE(10);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(15);
|
||||
LEX_ERROR();
|
||||
case 17:
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(11);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(12);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(13);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(15);
|
||||
ACCEPT_TOKEN(ts_sym_string);
|
||||
case 18:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
case 15:
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(15);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 16:
|
||||
ACCEPT_TOKEN(ts_sym_left_bracket);
|
||||
case 17:
|
||||
if (LOOKAHEAD_CHAR() == 'a')
|
||||
ADVANCE(18);
|
||||
LEX_ERROR();
|
||||
case 18:
|
||||
if (LOOKAHEAD_CHAR() == 'l')
|
||||
ADVANCE(19);
|
||||
LEX_ERROR();
|
||||
case 19:
|
||||
if (LOOKAHEAD_CHAR() == 's')
|
||||
if (LOOKAHEAD_CHAR() == '.')
|
||||
ADVANCE(20);
|
||||
LEX_ERROR();
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(23);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 20:
|
||||
if (LOOKAHEAD_CHAR() == 'e')
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(21);
|
||||
LEX_ERROR();
|
||||
case 21:
|
||||
ACCEPT_TOKEN(ts_sym_false);
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(22);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 22:
|
||||
if (LOOKAHEAD_CHAR() == 'u')
|
||||
ADVANCE(23);
|
||||
LEX_ERROR();
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(22);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 23:
|
||||
if (LOOKAHEAD_CHAR() == 'l')
|
||||
ADVANCE(24);
|
||||
LEX_ERROR();
|
||||
if (LOOKAHEAD_CHAR() == '.')
|
||||
ADVANCE(20);
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(23);
|
||||
ACCEPT_TOKEN(ts_sym_number);
|
||||
case 24:
|
||||
if (LOOKAHEAD_CHAR() == 'l')
|
||||
ADVANCE(25);
|
||||
LEX_ERROR();
|
||||
ACCEPT_TOKEN(ts_sym_left_bracket);
|
||||
case 25:
|
||||
ACCEPT_TOKEN(ts_sym_null);
|
||||
if (LOOKAHEAD_CHAR() == 'a')
|
||||
ADVANCE(26);
|
||||
LEX_ERROR();
|
||||
case 26:
|
||||
if (LOOKAHEAD_CHAR() == 'r')
|
||||
if (LOOKAHEAD_CHAR() == 'l')
|
||||
ADVANCE(27);
|
||||
LEX_ERROR();
|
||||
case 27:
|
||||
if (LOOKAHEAD_CHAR() == 'u')
|
||||
if (LOOKAHEAD_CHAR() == 's')
|
||||
ADVANCE(28);
|
||||
LEX_ERROR();
|
||||
case 28:
|
||||
|
|
@ -190,78 +219,106 @@ LEX_FN() {
|
|||
ADVANCE(29);
|
||||
LEX_ERROR();
|
||||
case 29:
|
||||
ACCEPT_TOKEN(ts_sym_true);
|
||||
ACCEPT_TOKEN(ts_sym_false);
|
||||
case 30:
|
||||
ACCEPT_TOKEN(ts_sym_left_brace);
|
||||
if (LOOKAHEAD_CHAR() == 'u')
|
||||
ADVANCE(31);
|
||||
LEX_ERROR();
|
||||
case 31:
|
||||
if (LOOKAHEAD_CHAR() == ':')
|
||||
if (LOOKAHEAD_CHAR() == 'l')
|
||||
ADVANCE(32);
|
||||
LEX_ERROR();
|
||||
case 32:
|
||||
ACCEPT_TOKEN(ts_sym_colon);
|
||||
if (LOOKAHEAD_CHAR() == 'l')
|
||||
ADVANCE(33);
|
||||
LEX_ERROR();
|
||||
case 33:
|
||||
ACCEPT_TOKEN(ts_sym_null);
|
||||
case 34:
|
||||
if (LOOKAHEAD_CHAR() == 'r')
|
||||
ADVANCE(35);
|
||||
LEX_ERROR();
|
||||
case 35:
|
||||
if (LOOKAHEAD_CHAR() == 'u')
|
||||
ADVANCE(36);
|
||||
LEX_ERROR();
|
||||
case 36:
|
||||
if (LOOKAHEAD_CHAR() == 'e')
|
||||
ADVANCE(37);
|
||||
LEX_ERROR();
|
||||
case 37:
|
||||
ACCEPT_TOKEN(ts_sym_true);
|
||||
case 38:
|
||||
ACCEPT_TOKEN(ts_sym_left_brace);
|
||||
case 39:
|
||||
if (LOOKAHEAD_CHAR() == ':')
|
||||
ADVANCE(40);
|
||||
LEX_ERROR();
|
||||
case 40:
|
||||
ACCEPT_TOKEN(ts_sym_colon);
|
||||
case 41:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(9);
|
||||
if (LOOKAHEAD_CHAR() == '}')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR();
|
||||
case 34:
|
||||
case 42:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(9);
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(15);
|
||||
ADVANCE(19);
|
||||
if (LOOKAHEAD_CHAR() == '[')
|
||||
ADVANCE(16);
|
||||
ADVANCE(24);
|
||||
if (LOOKAHEAD_CHAR() == ']')
|
||||
ADVANCE(6);
|
||||
if (LOOKAHEAD_CHAR() == 'f')
|
||||
ADVANCE(17);
|
||||
ADVANCE(25);
|
||||
if (LOOKAHEAD_CHAR() == 'n')
|
||||
ADVANCE(22);
|
||||
if (LOOKAHEAD_CHAR() == 't')
|
||||
ADVANCE(26);
|
||||
if (LOOKAHEAD_CHAR() == '{')
|
||||
ADVANCE(30);
|
||||
if (LOOKAHEAD_CHAR() == 't')
|
||||
ADVANCE(34);
|
||||
if (LOOKAHEAD_CHAR() == '{')
|
||||
ADVANCE(38);
|
||||
LEX_ERROR();
|
||||
case 35:
|
||||
case 43:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(9);
|
||||
LEX_ERROR();
|
||||
case 36:
|
||||
case 44:
|
||||
ACCEPT_TOKEN(ts_sym_comma);
|
||||
case 37:
|
||||
case 45:
|
||||
ACCEPT_TOKEN(ts_sym_colon);
|
||||
case 38:
|
||||
case 46:
|
||||
ACCEPT_TOKEN(ts_sym_left_bracket);
|
||||
case 39:
|
||||
case 47:
|
||||
ACCEPT_TOKEN(ts_sym_right_bracket);
|
||||
case 40:
|
||||
case 48:
|
||||
ACCEPT_TOKEN(ts_sym_left_brace);
|
||||
case 41:
|
||||
case 49:
|
||||
ACCEPT_TOKEN(ts_sym_right_brace);
|
||||
case ts_lex_state_error:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(9);
|
||||
if (LOOKAHEAD_CHAR() == ',')
|
||||
ADVANCE(36);
|
||||
ADVANCE(44);
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(15);
|
||||
ADVANCE(19);
|
||||
if (LOOKAHEAD_CHAR() == ':')
|
||||
ADVANCE(37);
|
||||
ADVANCE(45);
|
||||
if (LOOKAHEAD_CHAR() == '[')
|
||||
ADVANCE(38);
|
||||
ADVANCE(46);
|
||||
if (LOOKAHEAD_CHAR() == ']')
|
||||
ADVANCE(39);
|
||||
ADVANCE(47);
|
||||
if (LOOKAHEAD_CHAR() == 'f')
|
||||
ADVANCE(17);
|
||||
ADVANCE(25);
|
||||
if (LOOKAHEAD_CHAR() == 'n')
|
||||
ADVANCE(22);
|
||||
ADVANCE(30);
|
||||
if (LOOKAHEAD_CHAR() == 't')
|
||||
ADVANCE(26);
|
||||
ADVANCE(34);
|
||||
if (LOOKAHEAD_CHAR() == '{')
|
||||
ADVANCE(40);
|
||||
ADVANCE(48);
|
||||
if (LOOKAHEAD_CHAR() == '}')
|
||||
ADVANCE(41);
|
||||
ADVANCE(49);
|
||||
LEX_ERROR();
|
||||
default:
|
||||
LEX_PANIC();
|
||||
|
|
@ -296,14 +353,14 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(3);
|
||||
SET_LEX_STATE(33);
|
||||
SET_LEX_STATE(41);
|
||||
SHIFT(ts_sym_string, 4)
|
||||
SHIFT(ts_sym_right_brace, 51)
|
||||
SHIFT(ts_builtin_sym_error, 52)
|
||||
END_STATE();
|
||||
|
||||
STATE(4);
|
||||
SET_LEX_STATE(31);
|
||||
SET_LEX_STATE(39);
|
||||
SHIFT(ts_sym_colon, 5)
|
||||
END_STATE();
|
||||
|
||||
|
|
@ -335,13 +392,13 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(8);
|
||||
SET_LEX_STATE(35);
|
||||
SET_LEX_STATE(43);
|
||||
SHIFT(ts_sym_string, 9)
|
||||
SHIFT(ts_builtin_sym_error, 47)
|
||||
END_STATE();
|
||||
|
||||
STATE(9);
|
||||
SET_LEX_STATE(31);
|
||||
SET_LEX_STATE(39);
|
||||
SHIFT(ts_sym_colon, 10)
|
||||
END_STATE();
|
||||
|
||||
|
|
@ -372,14 +429,14 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(13);
|
||||
SET_LEX_STATE(33);
|
||||
SET_LEX_STATE(41);
|
||||
SHIFT(ts_sym_string, 14)
|
||||
SHIFT(ts_sym_right_brace, 43)
|
||||
SHIFT(ts_builtin_sym_error, 44)
|
||||
END_STATE();
|
||||
|
||||
STATE(14);
|
||||
SET_LEX_STATE(31);
|
||||
SET_LEX_STATE(39);
|
||||
SHIFT(ts_sym_colon, 15)
|
||||
END_STATE();
|
||||
|
||||
|
|
@ -416,7 +473,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(19);
|
||||
SET_LEX_STATE(34);
|
||||
SET_LEX_STATE(42);
|
||||
SHIFT(ts_sym_array, 20)
|
||||
SHIFT(ts_sym_false, 20)
|
||||
SHIFT(ts_sym_null, 20)
|
||||
|
|
@ -472,14 +529,14 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(25);
|
||||
SET_LEX_STATE(33);
|
||||
SET_LEX_STATE(41);
|
||||
SHIFT(ts_sym_string, 26)
|
||||
SHIFT(ts_sym_right_brace, 31)
|
||||
SHIFT(ts_builtin_sym_error, 32)
|
||||
END_STATE();
|
||||
|
||||
STATE(26);
|
||||
SET_LEX_STATE(31);
|
||||
SET_LEX_STATE(39);
|
||||
SHIFT(ts_sym_colon, 27)
|
||||
END_STATE();
|
||||
|
||||
|
|
@ -540,7 +597,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(35);
|
||||
SET_LEX_STATE(34);
|
||||
SET_LEX_STATE(42);
|
||||
SHIFT(ts_sym_array, 20)
|
||||
SHIFT(ts_sym_false, 20)
|
||||
SHIFT(ts_sym_null, 20)
|
||||
|
|
@ -665,7 +722,7 @@ PARSE_TABLE() {
|
|||
END_STATE();
|
||||
|
||||
STATE(55);
|
||||
SET_LEX_STATE(34);
|
||||
SET_LEX_STATE(42);
|
||||
SHIFT(ts_sym_array, 20)
|
||||
SHIFT(ts_sym_false, 20)
|
||||
SHIFT(ts_sym_null, 20)
|
||||
|
|
|
|||
|
|
@ -18,6 +18,11 @@ describe("checking if rules can be blank", [&]() {
|
|||
|
||||
AssertThat(rule_can_be_blank(rule), Equals(false));
|
||||
});
|
||||
|
||||
it("returns true for repeats", [&]() {
|
||||
rule_ptr rule = repeat(str("x"));
|
||||
AssertThat(rule_can_be_blank(rule), Equals(true));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -130,6 +130,10 @@ describe("rule transitions", []() {
|
|||
})));
|
||||
});
|
||||
|
||||
it("handles blanks", [&]() {
|
||||
AssertThat(char_transitions(blank()), Equals(rule_map<CharacterSet>({})));
|
||||
});
|
||||
|
||||
it("handles repeats", [&]() {
|
||||
rule_ptr rule = repeat(str("ab"));
|
||||
AssertThat(
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ describe("parsing pattern rules", []() {
|
|||
});
|
||||
|
||||
it("parses character groups in sequences", []() {
|
||||
Pattern rule("\"([^\"]|\\\\\")+\"");
|
||||
Pattern rule("\"([^\"]|\\\\\")*\"");
|
||||
AssertThat(
|
||||
rule.to_rule_tree(),
|
||||
EqualsPointer(seq({
|
||||
|
|
@ -122,17 +122,40 @@ describe("parsing pattern rules", []() {
|
|||
rule.to_rule_tree(),
|
||||
EqualsPointer(
|
||||
seq({
|
||||
repeat(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' })
|
||||
})),
|
||||
repeat(seq({
|
||||
character({ 'c' }),
|
||||
character({ 'd' })
|
||||
})),
|
||||
seq({
|
||||
seq({ character({ 'a' }), character({ 'b' }) }),
|
||||
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
|
||||
}),
|
||||
seq({
|
||||
seq({ character({ 'c' }), character({ 'd' }) }),
|
||||
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
|
||||
}),
|
||||
})
|
||||
));
|
||||
|
||||
Pattern rule2("(ab)*(cd)*");
|
||||
AssertThat(
|
||||
rule2.to_rule_tree(),
|
||||
EqualsPointer(
|
||||
seq({
|
||||
repeat(seq({ character({ 'a' }), character({ 'b' }) })),
|
||||
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
|
||||
})
|
||||
));
|
||||
});
|
||||
|
||||
it("parses optional rules", []() {
|
||||
Pattern rule("a(bc)?");
|
||||
AssertThat(
|
||||
rule.to_rule_tree(),
|
||||
EqualsPointer(seq({
|
||||
character({ 'a' }),
|
||||
choice({
|
||||
seq({ character({ 'b' }), character({ 'c' }) }),
|
||||
blank()
|
||||
})
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -1,3 +1,10 @@
|
|||
=============================
|
||||
parses floating point numbers
|
||||
=============================
|
||||
3.14
|
||||
---
|
||||
(value (number))
|
||||
|
||||
===================
|
||||
parses empty arrays
|
||||
===================
|
||||
|
|
|
|||
|
|
@ -15,31 +15,37 @@ namespace tree_sitter {
|
|||
using std::map;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::Repeat;
|
||||
using rules::Rule;
|
||||
using rules::Seq;
|
||||
using rules::Symbol;
|
||||
|
||||
namespace prepare_grammar {
|
||||
class ExpandRepeats : public rules::RuleFn<rule_ptr> {
|
||||
rule_ptr make_repeat_helper(string name, const rule_ptr &rule) {
|
||||
return rules::Choice::Build({
|
||||
rules::Seq::Build({ rule, make_shared<rules::Symbol>(name, rules::SymbolTypeAuxiliary) }),
|
||||
make_shared<rules::Blank>() });
|
||||
return Choice::Build({
|
||||
Seq::Build({ rule, make_shared<Symbol>(name, rules::SymbolTypeAuxiliary) }),
|
||||
make_shared<Blank>() });
|
||||
}
|
||||
|
||||
void visit(const rules::Repeat *rule) {
|
||||
void visit(const Repeat *rule) {
|
||||
rule_ptr inner_rule = apply(rule->content);
|
||||
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
|
||||
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
|
||||
value = make_shared<rules::Symbol>(helper_rule_name, rules::SymbolTypeAuxiliary);
|
||||
value = make_shared<Symbol>(helper_rule_name, rules::SymbolTypeAuxiliary);
|
||||
}
|
||||
|
||||
void visit(const rules::Seq *rule) {
|
||||
value = rules::Seq::Build({ apply(rule->left), apply(rule->right) });
|
||||
void visit(const Seq *rule) {
|
||||
value = Seq::Build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
void visit(const rules::Choice *rule) {
|
||||
value = rules::Choice::Build({ apply(rule->left), apply(rule->right) });
|
||||
void visit(const Choice *rule) {
|
||||
value = Choice::Build({ apply(rule->left), apply(rule->right) });
|
||||
}
|
||||
|
||||
void default_visit(const rules::Rule *rule) {
|
||||
void default_visit(const Rule *rule) {
|
||||
value = rule->copy();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
|
@ -40,9 +41,21 @@ namespace tree_sitter {
|
|||
|
||||
rule_ptr factor() {
|
||||
rule_ptr result = atom();
|
||||
if (has_more_input() && (peek() == '+')) {
|
||||
next();
|
||||
result = make_shared<Repeat>(result);
|
||||
if (has_more_input()) {
|
||||
switch (peek()) {
|
||||
case '*':
|
||||
next();
|
||||
result = make_shared<Repeat>(result);
|
||||
break;
|
||||
case '+':
|
||||
next();
|
||||
result = make_shared<Seq>(result, make_shared<Repeat>(result));
|
||||
break;
|
||||
case '?':
|
||||
next();
|
||||
result = make_shared<Choice>(result, make_shared<Blank>());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue