diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index e644fd75..59e08361 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -59,7 +59,8 @@ LEX_FN() { switch (lex_state) { case 1: START_TOKEN(); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(1); @@ -88,7 +89,8 @@ LEX_FN() { START_TOKEN(); if (lookahead == 0) ADVANCE(6); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(5); @@ -117,7 +119,8 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_5); case 12: START_TOKEN(); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(12); @@ -150,7 +153,8 @@ LEX_FN() { START_TOKEN(); if (lookahead == 0) ADVANCE(6); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(15); @@ -178,7 +182,8 @@ LEX_FN() { START_TOKEN(); if (lookahead == 0) ADVANCE(6); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(15); diff --git a/examples/parsers/golang.c b/examples/parsers/golang.c index 2b171aaf..c11add1e 100644 --- a/examples/parsers/golang.c +++ b/examples/parsers/golang.c @@ -442,33 +442,33 @@ LEX_FN() { ADVANCE(3); LEX_ERROR(); case 36: - if (!((lookahead == '\"') || - (lookahead == '\\'))) - ADVANCE(36); if (lookahead == '\"') ADVANCE(37); if (lookahead == '\\') ADVANCE(38); + if (!((lookahead == '\"') || + (lookahead == '\\'))) + ADVANCE(36); LEX_ERROR(); case 37: ACCEPT_TOKEN(ts_sym_string); case 38: - if (!((lookahead == '\"') || - (lookahead == '\\'))) - ADVANCE(36); if (lookahead == '\"') ADVANCE(39); if (lookahead == '\\') ADVANCE(38); - LEX_ERROR(); - case 39: if (!((lookahead == '\"') || (lookahead == '\\'))) ADVANCE(36); + LEX_ERROR(); + case 39: if (lookahead == '\"') ADVANCE(37); if (lookahead == '\\') ADVANCE(38); + if (!((lookahead == '\"') || + (lookahead == '\\'))) + ADVANCE(36); ACCEPT_TOKEN(ts_sym_string); case 40: ACCEPT_TOKEN(ts_aux_sym_1); @@ -644,7 +644,8 @@ LEX_FN() { if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || - ('a' <= lookahead && lookahead <= 'b') || + (lookahead == 'a') || + (lookahead == 'b') || ('d' <= lookahead && lookahead <= 'z')) ADVANCE(33); if (lookahead == 'c') @@ -727,7 +728,8 @@ LEX_FN() { if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || - ('a' <= lookahead && lookahead <= 'b') || + (lookahead == 'a') || + (lookahead == 'b') || ('d' <= lookahead && lookahead <= 'z')) ADVANCE(33); if (lookahead == 'c') @@ -940,7 +942,8 @@ LEX_FN() { ADVANCE(88); if (('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'q') || ('s' <= lookahead && lookahead <= 'u') || ('w' <= lookahead && lookahead <= 'z')) @@ -1310,7 +1313,8 @@ LEX_FN() { ADVANCE(88); if (('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'd') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'q') || ('s' <= lookahead && lookahead <= 'u') || ('w' <= lookahead && lookahead <= 'z')) @@ -1542,7 +1546,8 @@ LEX_FN() { ADVANCE(115); if (('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'q') || ('s' <= lookahead && lookahead <= 'u') || ('w' <= lookahead && lookahead <= 'z')) @@ -1617,7 +1622,8 @@ LEX_FN() { ADVANCE(82); if (('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'q') || ('s' <= lookahead && lookahead <= 'u') || ('w' <= lookahead && lookahead <= 'z')) @@ -1703,7 +1709,8 @@ LEX_FN() { ADVANCE(145); if (('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'q') || ('s' <= lookahead && lookahead <= 'u') || ('w' <= lookahead && lookahead <= 'z')) @@ -1850,9 +1857,11 @@ LEX_FN() { ADVANCE(115); if (('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'd') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'l') || - ('n' <= lookahead && lookahead <= 'o') || + (lookahead == 'n') || + (lookahead == 'o') || (lookahead == 'q') || (lookahead == 'u') || ('w' <= lookahead && lookahead <= 'z')) @@ -1917,7 +1926,8 @@ LEX_FN() { if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || - ('a' <= lookahead && lookahead <= 'b') || + (lookahead == 'a') || + (lookahead == 'b') || ('d' <= lookahead && lookahead <= 'z')) ADVANCE(33); if (lookahead == 'c') @@ -2005,7 +2015,8 @@ LEX_FN() { if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || - ('a' <= lookahead && lookahead <= 'b') || + (lookahead == 'a') || + (lookahead == 'b') || ('d' <= lookahead && lookahead <= 'z')) ADVANCE(33); if (lookahead == 'c') @@ -2189,9 +2200,11 @@ LEX_FN() { ADVANCE(115); if (('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'd') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'l') || - ('n' <= lookahead && lookahead <= 'o') || + (lookahead == 'n') || + (lookahead == 'o') || (lookahead == 'q') || (lookahead == 'u') || ('w' <= lookahead && lookahead <= 'z')) diff --git a/examples/parsers/javascript.c b/examples/parsers/javascript.c index 06fe2a49..2744164d 100644 --- a/examples/parsers/javascript.c +++ b/examples/parsers/javascript.c @@ -318,7 +318,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -368,33 +369,33 @@ LEX_FN() { case 4: ACCEPT_TOKEN(ts_aux_sym_28); case 5: - if (!((lookahead == '\"') || - (lookahead == '\\'))) - ADVANCE(5); if (lookahead == '\"') ADVANCE(6); if (lookahead == '\\') ADVANCE(7); + if (!((lookahead == '\"') || + (lookahead == '\\'))) + ADVANCE(5); LEX_ERROR(); case 6: ACCEPT_TOKEN(ts_sym_string); case 7: - if (!((lookahead == '\"') || - (lookahead == '\\'))) - ADVANCE(5); if (lookahead == '\"') ADVANCE(8); if (lookahead == '\\') ADVANCE(7); - LEX_ERROR(); - case 8: if (!((lookahead == '\"') || (lookahead == '\\'))) ADVANCE(5); + LEX_ERROR(); + case 8: if (lookahead == '\"') ADVANCE(6); if (lookahead == '\\') ADVANCE(7); + if (!((lookahead == '\"') || + (lookahead == '\\'))) + ADVANCE(5); ACCEPT_TOKEN(ts_sym_string); case 9: if ((lookahead == '$') || @@ -405,31 +406,31 @@ LEX_FN() { ADVANCE(9); ACCEPT_TOKEN(ts_sym_identifier); case 10: - if (!((lookahead == '\'') || - (lookahead == '\\'))) - ADVANCE(10); if (lookahead == '\'') ADVANCE(6); if (lookahead == '\\') ADVANCE(11); - LEX_ERROR(); - case 11: if (!((lookahead == '\'') || (lookahead == '\\'))) ADVANCE(10); + LEX_ERROR(); + case 11: if (lookahead == '\'') ADVANCE(12); if (lookahead == '\\') ADVANCE(11); - LEX_ERROR(); - case 12: if (!((lookahead == '\'') || (lookahead == '\\'))) ADVANCE(10); + LEX_ERROR(); + case 12: if (lookahead == '\'') ADVANCE(6); if (lookahead == '\\') ADVANCE(11); + if (!((lookahead == '\'') || + (lookahead == '\\'))) + ADVANCE(10); ACCEPT_TOKEN(ts_sym_string); case 13: ACCEPT_TOKEN(ts_aux_sym_3); @@ -446,170 +447,170 @@ LEX_FN() { case 17: ACCEPT_TOKEN(ts_aux_sym_10); case 18: + if (lookahead == '*') + ADVANCE(19); + if (lookahead == '/') + ADVANCE(31); + if (lookahead == '\\') + ADVANCE(34); + if (!((lookahead == '*') || + (lookahead == '/') || + (lookahead == '\\'))) + ADVANCE(38); + LEX_ERROR(); + case 19: + if (lookahead == '*') + ADVANCE(20); + if (lookahead == '/') + ADVANCE(25); + if (lookahead == '\\') + ADVANCE(23); if (!((lookahead == '*') || (lookahead == '/') || (lookahead == '\\'))) ADVANCE(19); - if (lookahead == '*') - ADVANCE(25); - if (lookahead == '/') - ADVANCE(36); - if (lookahead == '\\') - ADVANCE(22); - LEX_ERROR(); - case 19: - if (!((lookahead == '/') || - (lookahead == '\\'))) - ADVANCE(19); - if (lookahead == '/') - ADVANCE(20); - if (lookahead == '\\') - ADVANCE(22); LEX_ERROR(); case 20: - if (lookahead == 'g') + if (lookahead == '/') ADVANCE(21); - ACCEPT_TOKEN(ts_sym_regex); - case 21: - ACCEPT_TOKEN(ts_sym_regex); - case 22: + if (lookahead == '\\') + ADVANCE(23); if (!((lookahead == '/') || (lookahead == '\\'))) ADVANCE(19); - if (lookahead == '/') - ADVANCE(23); - if (lookahead == '\\') - ADVANCE(22); LEX_ERROR(); + case 21: + if (lookahead == 'g') + ADVANCE(22); + ACCEPT_TOKEN(ts_sym_comment); + case 22: + ACCEPT_TOKEN(ts_sym_regex); case 23: - if (!((lookahead == '/') || + if (lookahead == '*') + ADVANCE(20); + if (lookahead == '/') + ADVANCE(24); + if (lookahead == '\\') + ADVANCE(23); + if (!((lookahead == '*') || + (lookahead == '/') || + (lookahead == '\\'))) + ADVANCE(19); + LEX_ERROR(); + case 24: + if (lookahead == '*') + ADVANCE(20); + if (lookahead == '/') + ADVANCE(25); + if (lookahead == '\\') + ADVANCE(23); + if (lookahead == 'g') + ADVANCE(30); + if (!((lookahead == '*') || + (lookahead == '/') || (lookahead == '\\') || (lookahead == 'g'))) ADVANCE(19); - if (lookahead == '/') - ADVANCE(20); - if (lookahead == '\\') - ADVANCE(22); - if (lookahead == 'g') - ADVANCE(24); - ACCEPT_TOKEN(ts_sym_regex); - case 24: - if (!((lookahead == '/') || - (lookahead == '\\'))) - ADVANCE(19); - if (lookahead == '/') - ADVANCE(20); - if (lookahead == '\\') - ADVANCE(22); ACCEPT_TOKEN(ts_sym_regex); case 25: - if (!((lookahead == '*') || - (lookahead == '/') || - (lookahead == '\\'))) - ADVANCE(25); if (lookahead == '*') ADVANCE(26); - if (lookahead == '/') - ADVANCE(30); - if (lookahead == '\\') + if (lookahead == 'g') + ADVANCE(29); + if (!((lookahead == '*') || + (lookahead == 'g'))) ADVANCE(28); - LEX_ERROR(); + ACCEPT_TOKEN(ts_sym_regex); case 26: - if (!((lookahead == '/') || - (lookahead == '\\'))) - ADVANCE(25); if (lookahead == '/') ADVANCE(27); - if (lookahead == '\\') + if (!(lookahead == '/')) ADVANCE(28); LEX_ERROR(); case 27: - if (lookahead == 'g') - ADVANCE(21); ACCEPT_TOKEN(ts_sym_comment); case 28: - if (!((lookahead == '*') || - (lookahead == '/') || - (lookahead == '\\'))) - ADVANCE(25); if (lookahead == '*') ADVANCE(26); - if (lookahead == '/') - ADVANCE(29); - if (lookahead == '\\') + if (!(lookahead == '*')) ADVANCE(28); LEX_ERROR(); case 29: - if (!((lookahead == '*') || - (lookahead == '/') || - (lookahead == '\\') || - (lookahead == 'g'))) - ADVANCE(25); if (lookahead == '*') ADVANCE(26); - if (lookahead == '/') - ADVANCE(30); - if (lookahead == '\\') + if (!(lookahead == '*')) ADVANCE(28); - if (lookahead == 'g') - ADVANCE(35); ACCEPT_TOKEN(ts_sym_regex); case 30: - if (!((lookahead == '*') || - (lookahead == 'g'))) - ADVANCE(31); if (lookahead == '*') - ADVANCE(32); - if (lookahead == 'g') - ADVANCE(34); - ACCEPT_TOKEN(ts_sym_regex); - case 31: - if (!(lookahead == '*')) - ADVANCE(31); - if (lookahead == '*') - ADVANCE(32); - LEX_ERROR(); - case 32: - if (!(lookahead == '/')) - ADVANCE(31); + ADVANCE(20); if (lookahead == '/') - ADVANCE(33); - LEX_ERROR(); - case 33: - ACCEPT_TOKEN(ts_sym_comment); - case 34: - if (!(lookahead == '*')) - ADVANCE(31); - if (lookahead == '*') - ADVANCE(32); - ACCEPT_TOKEN(ts_sym_regex); - case 35: + ADVANCE(25); + if (lookahead == '\\') + ADVANCE(23); if (!((lookahead == '*') || (lookahead == '/') || (lookahead == '\\'))) - ADVANCE(25); - if (lookahead == '*') - ADVANCE(26); - if (lookahead == '/') - ADVANCE(30); - if (lookahead == '\\') - ADVANCE(28); + ADVANCE(19); ACCEPT_TOKEN(ts_sym_regex); - case 36: + case 31: + if (lookahead == 'g') + ADVANCE(32); if (!((lookahead == '\n') || (lookahead == 'g'))) - ADVANCE(37); - if (lookahead == 'g') + ADVANCE(33); + ACCEPT_TOKEN(ts_sym_comment); + case 32: + if (!(lookahead == '\n')) + ADVANCE(33); + ACCEPT_TOKEN(ts_sym_comment); + case 33: + if (!(lookahead == '\n')) + ADVANCE(33); + ACCEPT_TOKEN(ts_sym_comment); + case 34: + if (lookahead == '/') + ADVANCE(35); + if (lookahead == '\\') + ADVANCE(34); + if (!((lookahead == '/') || + (lookahead == '\\'))) ADVANCE(38); - ACCEPT_TOKEN(ts_sym_comment); + LEX_ERROR(); + case 35: + if (lookahead == '/') + ADVANCE(36); + if (lookahead == '\\') + ADVANCE(34); + if (lookahead == 'g') + ADVANCE(37); + if (!((lookahead == '/') || + (lookahead == '\\') || + (lookahead == 'g'))) + ADVANCE(38); + ACCEPT_TOKEN(ts_sym_regex); + case 36: + if (lookahead == 'g') + ADVANCE(22); + ACCEPT_TOKEN(ts_sym_regex); case 37: - if (!(lookahead == '\n')) - ADVANCE(37); - ACCEPT_TOKEN(ts_sym_comment); + if (lookahead == '/') + ADVANCE(36); + if (lookahead == '\\') + ADVANCE(34); + if (!((lookahead == '/') || + (lookahead == '\\'))) + ADVANCE(38); + ACCEPT_TOKEN(ts_sym_regex); case 38: - if (!(lookahead == '\n')) - ADVANCE(37); - ACCEPT_TOKEN(ts_sym_comment); + if (lookahead == '/') + ADVANCE(36); + if (lookahead == '\\') + ADVANCE(34); + if (!((lookahead == '/') || + (lookahead == '\\'))) + ADVANCE(38); + LEX_ERROR(); case 39: if (lookahead == '.') ADVANCE(40); @@ -832,7 +833,8 @@ LEX_FN() { ('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || - ('a' <= lookahead && lookahead <= 'b') || + (lookahead == 'a') || + (lookahead == 'b') || ('d' <= lookahead && lookahead <= 'z')) ADVANCE(9); if (lookahead == 'c') @@ -1073,7 +1075,8 @@ LEX_FN() { ('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || - ('a' <= lookahead && lookahead <= 'b') || + (lookahead == 'a') || + (lookahead == 'b') || ('d' <= lookahead && lookahead <= 'z')) ADVANCE(9); if (lookahead == 'c') @@ -1347,9 +1350,9 @@ LEX_FN() { LEX_ERROR(); case 112: if (lookahead == '*') - ADVANCE(31); + ADVANCE(28); if (lookahead == '/') - ADVANCE(37); + ADVANCE(33); LEX_ERROR(); case 113: START_TOKEN(); @@ -1416,9 +1419,9 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_34); case 121: if (lookahead == '*') - ADVANCE(31); + ADVANCE(28); if (lookahead == '/') - ADVANCE(37); + ADVANCE(33); ACCEPT_TOKEN(ts_aux_sym_14); case 122: ACCEPT_TOKEN(ts_aux_sym_5); @@ -1559,9 +1562,9 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_31); case 151: if (lookahead == '*') - ADVANCE(31); + ADVANCE(28); if (lookahead == '/') - ADVANCE(37); + ADVANCE(33); if (lookahead == '=') ADVANCE(152); ACCEPT_TOKEN(ts_aux_sym_14); @@ -1602,7 +1605,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -1779,7 +1783,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -2588,7 +2593,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -2658,28 +2664,28 @@ LEX_FN() { ADVANCE(115); ACCEPT_TOKEN(ts_aux_sym_28); case 194: + if (lookahead == '*') + ADVANCE(19); + if (lookahead == '/') + ADVANCE(31); + if (lookahead == '=') + ADVANCE(195); + if (lookahead == '\\') + ADVANCE(34); if (!((lookahead == '*') || (lookahead == '/') || (lookahead == '=') || (lookahead == '\\'))) - ADVANCE(19); - if (lookahead == '*') - ADVANCE(25); - if (lookahead == '/') - ADVANCE(36); - if (lookahead == '=') - ADVANCE(195); - if (lookahead == '\\') - ADVANCE(22); + ADVANCE(38); ACCEPT_TOKEN(ts_aux_sym_14); case 195: + if (lookahead == '/') + ADVANCE(36); + if (lookahead == '\\') + ADVANCE(34); if (!((lookahead == '/') || (lookahead == '\\'))) - ADVANCE(19); - if (lookahead == '/') - ADVANCE(20); - if (lookahead == '\\') - ADVANCE(22); + ADVANCE(38); ACCEPT_TOKEN(ts_aux_sym_33); case 196: if ((lookahead == '$') || @@ -2743,7 +2749,8 @@ LEX_FN() { ('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || - ('a' <= lookahead && lookahead <= 'b') || + (lookahead == 'a') || + (lookahead == 'b') || ('d' <= lookahead && lookahead <= 'z')) ADVANCE(9); if (lookahead == 'c') @@ -2808,7 +2815,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -2991,7 +2999,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -3059,16 +3068,16 @@ LEX_FN() { ADVANCE(156); LEX_ERROR(); case 212: + if (lookahead == '*') + ADVANCE(19); + if (lookahead == '/') + ADVANCE(31); + if (lookahead == '\\') + ADVANCE(34); if (!((lookahead == '*') || (lookahead == '/') || (lookahead == '\\'))) - ADVANCE(19); - if (lookahead == '*') - ADVANCE(25); - if (lookahead == '/') - ADVANCE(36); - if (lookahead == '\\') - ADVANCE(22); + ADVANCE(38); ACCEPT_TOKEN(ts_aux_sym_14); case 213: START_TOKEN(); @@ -3194,7 +3203,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -3333,7 +3343,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -3513,7 +3524,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -3597,7 +3609,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -3663,7 +3676,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -3747,7 +3761,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -3822,7 +3837,8 @@ LEX_FN() { ('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || - ('a' <= lookahead && lookahead <= 'b') || + (lookahead == 'a') || + (lookahead == 'b') || ('d' <= lookahead && lookahead <= 'z')) ADVANCE(9); if (lookahead == 'c') @@ -3945,7 +3961,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -4029,7 +4046,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -4095,7 +4113,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -4178,7 +4197,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -4284,7 +4304,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -4369,7 +4390,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -4436,7 +4458,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -4731,7 +4754,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || ('a' <= lookahead && lookahead <= 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 's') || ('u' <= lookahead && lookahead <= 'z')) @@ -4810,7 +4834,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || ('a' <= lookahead && lookahead <= 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 's') || ('u' <= lookahead && lookahead <= 'z')) @@ -4984,7 +5009,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5048,7 +5074,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5132,7 +5159,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5198,7 +5226,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5281,7 +5310,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5346,7 +5376,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5431,7 +5462,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5498,7 +5530,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5583,7 +5616,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5648,7 +5682,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5733,7 +5768,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5800,7 +5836,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5886,7 +5923,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -5971,7 +6009,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6055,7 +6094,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6121,7 +6161,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6204,7 +6245,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6269,7 +6311,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6354,7 +6397,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6421,7 +6465,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6508,7 +6553,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6571,7 +6617,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6654,7 +6701,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6719,7 +6767,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6801,7 +6850,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6865,7 +6915,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -6949,7 +7000,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7015,7 +7067,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7099,7 +7152,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7163,7 +7217,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7247,7 +7302,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7313,7 +7369,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7400,7 +7457,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7483,7 +7541,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7548,7 +7607,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7630,7 +7690,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7694,7 +7755,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7778,7 +7840,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7844,7 +7907,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7927,7 +7991,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -7992,7 +8057,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8077,7 +8143,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8144,7 +8211,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8231,7 +8299,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8294,7 +8363,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8377,7 +8447,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8442,7 +8513,8 @@ LEX_FN() { (lookahead == 'a') || (lookahead == 'c') || (lookahead == 'e') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8524,7 +8596,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8588,7 +8661,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8672,7 +8746,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8738,7 +8813,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8822,7 +8898,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8886,7 +8963,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -8970,7 +9048,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -9036,7 +9115,8 @@ LEX_FN() { (lookahead == '_') || (lookahead == 'a') || (lookahead == 'c') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -9119,7 +9199,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || @@ -9214,7 +9295,8 @@ LEX_FN() { ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || (lookahead == 'a') || - ('g' <= lookahead && lookahead <= 'h') || + (lookahead == 'g') || + (lookahead == 'h') || ('j' <= lookahead && lookahead <= 'm') || ('o' <= lookahead && lookahead <= 'q') || (lookahead == 'u') || diff --git a/examples/parsers/json.c b/examples/parsers/json.c index a0113878..0224d72b 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -60,7 +60,8 @@ LEX_FN() { switch (lex_state) { case 1: START_TOKEN(); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(1); @@ -80,33 +81,33 @@ LEX_FN() { ADVANCE(23); LEX_ERROR(); case 2: - if (!((lookahead == '\"') || - (lookahead == '\\'))) - ADVANCE(2); if (lookahead == '\"') ADVANCE(3); if (lookahead == '\\') ADVANCE(4); + if (!((lookahead == '\"') || + (lookahead == '\\'))) + ADVANCE(2); LEX_ERROR(); case 3: ACCEPT_TOKEN(ts_sym_string); case 4: - if (!((lookahead == '\"') || - (lookahead == '\\'))) - ADVANCE(2); if (lookahead == '\"') ADVANCE(5); if (lookahead == '\\') ADVANCE(4); - LEX_ERROR(); - case 5: if (!((lookahead == '\"') || (lookahead == '\\'))) ADVANCE(2); + LEX_ERROR(); + case 5: if (lookahead == '\"') ADVANCE(3); if (lookahead == '\\') ADVANCE(4); + if (!((lookahead == '\"') || + (lookahead == '\\'))) + ADVANCE(2); ACCEPT_TOKEN(ts_sym_string); case 6: if (lookahead == '.') @@ -186,7 +187,8 @@ LEX_FN() { ACCEPT_TOKEN(ts_builtin_sym_end); case 26: START_TOKEN(); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(26); @@ -199,7 +201,8 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_4); case 28: START_TOKEN(); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(28); @@ -224,7 +227,8 @@ LEX_FN() { ACCEPT_TOKEN(ts_aux_sym_6); case 30: START_TOKEN(); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(30); @@ -247,7 +251,8 @@ LEX_FN() { LEX_ERROR(); case 33: START_TOKEN(); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(33); @@ -292,7 +297,8 @@ LEX_FN() { START_TOKEN(); if (lookahead == 0) ADVANCE(25); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(38); @@ -320,9 +326,11 @@ LEX_FN() { ADVANCE(27); LEX_ERROR(); case ts_lex_state_error: + START_TOKEN(); if (lookahead == 0) ADVANCE(25); - if (('\t' <= lookahead && lookahead <= '\n') || + if ((lookahead == '\t') || + (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(38); diff --git a/spec/compiler/build_tables/item_set_transitions_spec.cc b/spec/compiler/build_tables/item_set_transitions_spec.cc index 17451423..84f31570 100644 --- a/spec/compiler/build_tables/item_set_transitions_spec.cc +++ b/spec/compiler/build_tables/item_set_transitions_spec.cc @@ -1,6 +1,7 @@ #include "compiler/compiler_spec_helper.h" #include "compiler/build_tables/item_set_transitions.h" #include "compiler/prepared_grammar.h" +#include "compiler/helpers/rule_helpers.h" using namespace rules; using namespace build_tables; @@ -11,16 +12,16 @@ describe("lexical item set transitions", []() { describe("when two items in the set have transitions on the same character", [&]() { it("merges the transitions by computing the union of the two item sets", [&]() { LexItemSet set1({ - LexItem(Symbol(1), character({ {'a', 'f'} })), - LexItem(Symbol(2), character({ {'e', 'x'} })) }); + LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()), + LexItem(Symbol(2), CharacterSet().include('e', 'x').copy()) }); AssertThat(char_transitions(set1), Equals(map({ - { CharacterSet({ {'a', 'd'} }), LexItemSet({ + { CharacterSet().include('a', 'd'), LexItemSet({ LexItem(Symbol(1), blank()) }) }, - { CharacterSet({ {'e', 'f'} }), LexItemSet({ + { CharacterSet().include('e', 'f'), LexItemSet({ LexItem(Symbol(1), blank()), LexItem(Symbol(2), blank()) }) }, - { CharacterSet({ {'g', 'x'} }), LexItemSet({ + { CharacterSet().include('g', 'x'), LexItemSet({ LexItem(Symbol(2), blank()) }) }, }))); }); diff --git a/spec/compiler/build_tables/merge_transitions_spec.cc b/spec/compiler/build_tables/merge_transitions_spec.cc index d035f712..48b7bbd0 100644 --- a/spec/compiler/build_tables/merge_transitions_spec.cc +++ b/spec/compiler/build_tables/merge_transitions_spec.cc @@ -6,7 +6,7 @@ using namespace build_tables; START_TEST -describe("merging character set transitions", []() { +describe("merge_char_transitions", []() { typedef map int_map; auto do_merge = [&](int_map *left, const pair &new_pair) { @@ -18,20 +18,20 @@ describe("merging character set transitions", []() { describe("when none of the transitions intersect", [&]() { it("returns the union of the two sets of transitions", [&]() { int_map map({ - { CharacterSet({ 'a', 'c' }), 1 }, - { CharacterSet({ 'x', 'y' }), 2 }, - { CharacterSet({ '1', '9' }), 4 }, + { CharacterSet().include('a').include('c'), 1 }, + { CharacterSet().include('x').include('y'), 2 }, + { CharacterSet().include('1').include('9'), 4 }, }); - do_merge(&map, { CharacterSet({ ' ' }), 8 }); - do_merge(&map, { CharacterSet({ '\t' }), 16 }); + do_merge(&map, { CharacterSet().include(' '), 8 }); + do_merge(&map, { CharacterSet().include('\t'), 16 }); AssertThat(map, Equals(int_map({ - { CharacterSet({ 'a', 'c' }), 1 }, - { CharacterSet({ 'x', 'y' }), 2 }, - { CharacterSet({ '1', '9' }), 4 }, - { CharacterSet({ ' ' }), 8 }, - { CharacterSet({ '\t' }), 16 }, + { CharacterSet().include('a').include('c'), 1 }, + { CharacterSet().include('x').include('y'), 2 }, + { CharacterSet().include('1').include('9'), 4 }, + { CharacterSet().include(' '), 8 }, + { CharacterSet().include('\t'), 16 }, }))); }); }); @@ -39,18 +39,33 @@ describe("merging character set transitions", []() { describe("when transitions intersect", [&]() { it("merges the intersecting transitions using the provided function", [&]() { int_map map({ - { CharacterSet({ {'a', 'f'}, {'A', 'F'} }), 1 }, - { CharacterSet({ {'0', '9'} }), 2 }, + { CharacterSet().include('a', 'f').include('A', 'F'), 1 }, + { CharacterSet().include('0', '9'), 2 }, }); - do_merge(&map, { CharacterSet({ 'c' }), 4 }); - do_merge(&map, { CharacterSet({ '3' }), 8 }); + do_merge(&map, { CharacterSet().include('c'), 4 }); + do_merge(&map, { CharacterSet().include('3'), 8 }); AssertThat(map, Equals(int_map({ - { CharacterSet({ {'a', 'b'}, {'d', 'f'}, {'A', 'F'} }), 1 }, - { CharacterSet({ {'c'} }), 5 }, - { CharacterSet({ {'0', '2'}, {'4', '9'} }), 2 }, - { CharacterSet({ '3' }), 10 }, + { + CharacterSet() + .include('a', 'b') + .include('d', 'f') + .include('A', 'F'), + 1 + }, + { + CharacterSet().include('c'), + 5 + }, + { + CharacterSet().include('0', '2').include('4', '9'), + 2 + }, + { + CharacterSet().include('3'), + 10 + }, }))); }); }); @@ -58,15 +73,15 @@ describe("merging character set transitions", []() { describe("when two of the right transitions intersect the same left transition", [&]() { it("splits the left-hand transition correctly", [&]() { int_map map1({ - { CharacterSet({ 'a', 'c' }), 1 }, + { CharacterSet().include('a').include('c'), 1 }, }); - do_merge(&map1, { CharacterSet({ 'a' }), 2 }); - do_merge(&map1, { CharacterSet({ 'c' }), 4 }); + do_merge(&map1, { CharacterSet().include('a'), 2 }); + do_merge(&map1, { CharacterSet().include('c'), 4 }); AssertThat(map1, Equals(int_map({ - { CharacterSet({ 'a' }), 3 }, - { CharacterSet({ 'c' }), 5 }, + { CharacterSet().include('a'), 3 }, + { CharacterSet().include('c'), 5 }, }))); }); }); diff --git a/spec/compiler/build_tables/rule_transitions_spec.cc b/spec/compiler/build_tables/rule_transitions_spec.cc index d045f900..c0bb7e2e 100644 --- a/spec/compiler/build_tables/rule_transitions_spec.cc +++ b/spec/compiler/build_tables/rule_transitions_spec.cc @@ -8,7 +8,7 @@ using namespace build_tables; START_TEST -describe("rule transitions", []() { +describe("sym_transitions", []() { it("handles symbols", [&]() { AssertThat( sym_transitions(i_sym(1)), @@ -74,11 +74,26 @@ describe("rule transitions", []() { }))); }); + it("preserves metadata", [&]() { + map metadata_value({ + { PRECEDENCE, 5 } + }); + + rule_ptr rule = make_shared(seq({ i_sym(1), i_sym(2) }), metadata_value); + AssertThat( + sym_transitions(rule), + Equals(rule_map({ + { Symbol(1), make_shared(i_sym(2), metadata_value)}, + }))); + }); +}); + +describe("char_transitions", []() { it("handles characters", [&]() { AssertThat( char_transitions(character({ '1' })), Equals(rule_map({ - { CharacterSet({ '1' }), blank() } + { CharacterSet().include('1'), blank() } }))); }); @@ -92,9 +107,35 @@ describe("rule transitions", []() { character({ { 'm', 'z' } }), sym("y") }) })), Equals(rule_map({ - { CharacterSet({ {'a','l'} }), sym("x") }, - { CharacterSet({ {'m','s'} }), choice({ sym("x"), sym("y") }) }, - { CharacterSet({ {'t','z'} }), sym("y") }, + { CharacterSet().include('a','l'), sym("x") }, + { CharacterSet().include('m','s'), choice({ sym("x"), sym("y") }) }, + { CharacterSet().include('t','z'), sym("y") }, + }))); + }); + + it("handles choices between whitelisted and blacklisted character sets", [&]() { + AssertThat( + char_transitions(seq({ + choice({ + character({ '/' }, false), + seq({ + character({ '\\' }), + character({ '/' }) }) }), + character({ '/' }) })), + + Equals(rule_map({ + { CharacterSet() + .include_all() + .exclude('/') + .exclude('\\'), + character({ '/' }) }, + { CharacterSet() + .include('\\'), + seq({ + choice({ + blank(), + character({ '/' }) }), + character({ '/' }) }) }, }))); }); @@ -108,8 +149,8 @@ describe("rule transitions", []() { character({ { 'a', 'z' } }), sym("y") }) })), Equals(rule_map({ - { CharacterSet({ {'a', 'c'} }), choice({ sym("x"), sym("y") }) }, - { CharacterSet({ {'d', 'z'} }), sym("y") }, + { CharacterSet().include('a', 'c'), choice({ sym("x"), sym("y") }) }, + { CharacterSet().include('d', 'z'), sym("y") }, }))); AssertThat( @@ -121,10 +162,9 @@ describe("rule transitions", []() { character({ {'a', 'c'} }), sym("y") }) })), Equals(rule_map({ - { CharacterSet({ {'a', 'c'} }), choice({ sym("x"), sym("y") }) }, - { CharacterSet({ {'d', 'z'} }), sym("x") }, + { CharacterSet().include('a', 'c'), choice({ sym("x"), sym("y") }) }, + { CharacterSet().include('d', 'z'), sym("x") }, }))); - }); it("handles blanks", [&]() { @@ -137,7 +177,7 @@ describe("rule transitions", []() { char_transitions(rule), Equals(rule_map({ { - CharacterSet({ 'a' }), + CharacterSet().include('a'), seq({ character({ 'b' }), rule, @@ -148,41 +188,9 @@ describe("rule transitions", []() { AssertThat( char_transitions(rule), Equals(rule_map({ - { CharacterSet({ 'a' }), rule } + { CharacterSet().include('a'), rule } }))); }); - - it("preserves metadata", [&]() { - map metadata_value({ - { PRECEDENCE, 5 } - }); - - rule_ptr rule = make_shared(seq({ i_sym(1), i_sym(2) }), metadata_value); - AssertThat( - sym_transitions(rule), - Equals(rule_map({ - { Symbol(1), make_shared(i_sym(2), metadata_value)}, - }))); - }); - - describe("regression tests (somewhat redundant, should maybe be deleted later)", []() { - it("handles sequences that start with repeating characters", [&]() { - auto rule = seq({ - choice({ - repeat(character({ '"' }, false)), - blank(), - }), - character({ '"' }), - }); - - AssertThat(char_transitions(rule), Equals(rule_map({ - { CharacterSet({ '"' }).complement(), seq({ - repeat(character({ '"' }, false)), - character({ '"' }), }) }, - { CharacterSet({ '"' }), blank() }, - }))); - }); - }); }); END_TEST diff --git a/spec/compiler/helpers/containers.h b/spec/compiler/helpers/containers.h index c10dfc92..0757d5d5 100644 --- a/spec/compiler/helpers/containers.h +++ b/spec/compiler/helpers/containers.h @@ -44,7 +44,7 @@ class rule_list : public vector> { return true; } - rule_list(const initializer_list> &list) : + rule_list(const initializer_list> &list) : vector>(list) {} }; diff --git a/spec/compiler/helpers/rule_helpers.cc b/spec/compiler/helpers/rule_helpers.cc index 8c564020..60a7c616 100644 --- a/spec/compiler/helpers/rule_helpers.cc +++ b/spec/compiler/helpers/rule_helpers.cc @@ -9,14 +9,20 @@ namespace tree_sitter { namespace rules { rule_ptr character(const set &ranges) { - return make_shared(ranges); + return character(ranges, true); } rule_ptr character(const set &ranges, bool sign) { - if (sign) - return character(ranges); - else - return CharacterSet(ranges).complement().copy(); + CharacterSet result; + if (sign) { + for (auto &range : ranges) + result.include(range.min, range.max); + } else { + result.include_all(); + for (auto &range : ranges) + result.exclude(range.min, range.max); + } + return result.copy(); } rule_ptr i_sym(size_t index) { diff --git a/spec/compiler/prepare_grammar/expand_repeats_spec.cc b/spec/compiler/prepare_grammar/expand_repeats_spec.cc index f7e3db1c..9a4703cd 100644 --- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc +++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc @@ -71,7 +71,7 @@ describe("expanding repeat rules in a grammar", []() { AssertThat(match.rules, Equals(rule_list({ { "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) }, }))); - + AssertThat(match.aux_rules, Equals(rule_list({ { "rule0_repeat0", choice({ seq({ diff --git a/spec/compiler/prepare_grammar/parse_regex_spec.cc b/spec/compiler/prepare_grammar/parse_regex_spec.cc index 0b016a5b..743872b9 100644 --- a/spec/compiler/prepare_grammar/parse_regex_spec.cc +++ b/spec/compiler/prepare_grammar/parse_regex_spec.cc @@ -6,7 +6,7 @@ START_TEST using namespace rules; using prepare_grammar::parse_regex; -describe("parsing regex patterns", []() { +describe("parse_regex", []() { struct ValidInputRow { string description; string pattern; @@ -23,7 +23,7 @@ describe("parsing regex patterns", []() { { "'.' characters as wildcards", ".", - CharacterSet({'\n'}).complement().copy() + character({ '\n' }, false) }, { @@ -170,6 +170,19 @@ describe("parsing regex patterns", []() { blank() }) }) + }, + + { + "choices containing negated character classes", + "/([^/]|(\\\\/))*/", + seq({ + character({ '/' }), + repeat(choice({ + character({ '/' }, false), + seq({ character({ '\\' }), character({ '/' }) }), + })), + character({ '/' }), + }), } }; diff --git a/spec/compiler/rules/character_set_spec.cc b/spec/compiler/rules/character_set_spec.cc index 79154ddc..5d7def52 100644 --- a/spec/compiler/rules/character_set_spec.cc +++ b/spec/compiler/rules/character_set_spec.cc @@ -5,106 +5,327 @@ using namespace rules; START_TEST -describe("character sets", []() { - unsigned char max_char = 255; +describe("CharacterSet", []() { + describe("equality", [&]() { + it("returns true for identical character sets", [&]() { + CharacterSet set1 = CharacterSet() + .include('a', 'd') + .include('f', 'm'); - describe("computing the complement", [&]() { - it("works for the set containing only the null character", [&]() { - CharacterSet set1({ '\0' }); - auto set2 = set1.complement(); - AssertThat(set2, Equals(CharacterSet({ - { 1, max_char } - }))); - AssertThat(set2.complement(), Equals(set1)); + CharacterSet set2 = CharacterSet() + .include('a', 'd') + .include('f', 'm'); + + AssertThat(set1, Equals(set2)); }); - it("works for single character sets", [&]() { - CharacterSet set1({ 'b' }); - auto set2 = set1.complement(); - AssertThat(set2, Equals(CharacterSet({ - { 0, 'a' }, - { 'c', max_char }, - }))); - AssertThat(set2.complement(), Equals(set1)); + it("returns false for character sets that include different ranges", [&]() { + CharacterSet set1 = CharacterSet() + .include('a', 'd') + .include('f', 'm'); + + CharacterSet set2 = CharacterSet() + .include('a', 'c') + .include('f', 'm'); + + AssertThat(set1, !Equals(set2)); + AssertThat(set2, !Equals(set1)); + }); + + it("returns false for character sets that exclude different ranges", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .exclude('a', 'd') + .exclude('f', 'm'); + + CharacterSet set2 = CharacterSet() + .include_all() + .exclude('a', 'c') + .exclude('f', 'm'); + + AssertThat(set1, !Equals(set2)); + AssertThat(set2, !Equals(set1)); + }); + + it("returns false for character sets with different sign", [&]() { + CharacterSet set1 = CharacterSet().include_all(); + CharacterSet set2 = CharacterSet(); + + AssertThat(set1, !Equals(set2)); + AssertThat(set2, !Equals(set1)); }); }); - describe("computing unions", [&]() { - it("works for disjoint sets", [&]() { - CharacterSet set({ {'a', 'z'} }); - set.add_set(CharacterSet({ {'A', 'Z'} })); - AssertThat(set, Equals(CharacterSet({ {'a', 'z'}, {'A', 'Z'} }))); + describe("hashing", [&]() { + it("returns the same number for identical character sets", [&]() { + CharacterSet set1 = CharacterSet() + .include('a', 'd') + .include('f', 'm'); + + CharacterSet set2 = CharacterSet() + .include('a', 'd') + .include('f', 'm'); + + AssertThat(set1.hash_code(), Equals(set2.hash_code())); }); - it("works for sets with adjacent ranges", [&]() { - CharacterSet set({ CharacterRange('a', 'r') }); - set.add_set(CharacterSet({ CharacterRange('s', 'z') })); - AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }))); + it("returns different numbers for character sets that include different ranges", [&]() { + CharacterSet set1 = CharacterSet() + .include('a', 'd') + .include('f', 'm'); + + CharacterSet set2 = CharacterSet() + .include('a', 'c') + .include('f', 'm'); + + AssertThat(set1.hash_code(), !Equals(set2.hash_code())); + AssertThat(set2.hash_code(), !Equals(set1.hash_code())); }); - it("becomes the complete set when the complement is added", [&]() { - CharacterSet set({ 'c' }); - auto complement = set.complement(); - set.add_set(complement); - AssertThat(set, Equals(CharacterSet({ {0, max_char} }))); + it("returns different numbers for character sets that exclude different ranges", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .exclude('a', 'd') + .exclude('f', 'm'); + + CharacterSet set2 = CharacterSet() + .include_all() + .exclude('a', 'c') + .exclude('f', 'm'); + + AssertThat(set1.hash_code(), !Equals(set2.hash_code())); + AssertThat(set2.hash_code(), !Equals(set1.hash_code())); }); - it("works when the result becomes a continuous range", []() { - CharacterSet set({ {'a', 'd'}, {'f', 'z'} }); - set.add_set(CharacterSet({ {'c', 'g'} })); - AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }))); - }); + it("returns different numbers for character sets with different sign", [&]() { + CharacterSet set1 = CharacterSet().include_all(); + CharacterSet set2 = CharacterSet(); - it("does nothing for the set of all characters", [&]() { - CharacterSet set({ 'a' }); - set.add_set(set.complement()); - AssertThat(set, Equals(CharacterSet({ {'\0', max_char} }))); + AssertThat(set1.hash_code(), !Equals(set2.hash_code())); + AssertThat(set2.hash_code(), !Equals(set1.hash_code())); }); }); - describe("subtracting sets", []() { + describe("::is_empty", [&]() { + it("returns true for empty character sets", [&]() { + AssertThat(CharacterSet().is_empty(), Equals(true)); + }); + + it("returns false for full character sets", [&]() { + AssertThat(CharacterSet().include_all().is_empty(), Equals(false)); + }); + + it("returns false for character sets that include some characters", [&]() { + AssertThat(CharacterSet().include('x').is_empty(), Equals(false)); + }); + }); + + describe("::include", [&]() { + describe("when the set has a whitelist of characters", [&]() { + it("adds included characters", [&]() { + CharacterSet set1 = CharacterSet().include('a', 'd'); + AssertThat(set1, Equals(CharacterSet() + .include('a') + .include('b') + .include('c') + .include('d'))); + }); + }); + + describe("when the set has a blacklist of characters", [&]() { + it("removes excluded characters", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .exclude('a', 'g') + .include('c', 'e'); + AssertThat(set1, Equals(CharacterSet() + .include_all() + .exclude('a') + .exclude('b') + .exclude('f') + .exclude('g'))); + }); + + it("does nothing if the character are already not excluded", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .include('a', 'c'); + AssertThat(set1, Equals(CharacterSet().include_all())); + }); + }); + }); + + describe("::exclude", [&]() { + describe("when the set has a whitelist of characters", [&]() { + it("removes included characters", [&]() { + CharacterSet set1 = CharacterSet() + .include('a', 'g') + .exclude('c', 'e'); + AssertThat(set1, Equals(CharacterSet() + .include('a') + .include('b') + .include('f') + .include('g'))); + }); + + it("does nothing if the character's are already not included", [&]() { + CharacterSet set1 = CharacterSet().exclude('a', 'c'); + AssertThat(set1, Equals(CharacterSet())); + }); + }); + + describe("when the set has a blacklist of characters", [&]() { + it("removes excluded characters", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .exclude('a', 'd'); + AssertThat(set1, Equals(CharacterSet() + .include_all() + .exclude('a') + .exclude('b') + .exclude('c') + .exclude('d'))); + }); + }); + }); + + describe("::remove_set", []() { CharacterSet intersection; - it("works for disjoint sets", [&]() { - CharacterSet set1({ {'a', 'z'} }); - intersection = set1.remove_set(CharacterSet({ {'A', 'Z'} })); - AssertThat(set1, Equals(CharacterSet({ {'a', 'z'} }))); - AssertThat(intersection, Equals(CharacterSet())); + describe("for a set with whitelisted characters", [&]() { + describe("when the subtractend has whitelisted characters", [&]() { + it("removes the included characters that the other set also includes", [&]() { + CharacterSet set1 = CharacterSet().include('a', 'z'); + set1.remove_set(CharacterSet().include('d', 's')); + AssertThat(set1, Equals(CharacterSet() + .include('a', 'c') + .include('t', 'z'))); + }); + + it("returns the characters that were removed", [&]() { + CharacterSet set1 = CharacterSet().include('a', 'z'); + intersection = set1.remove_set(CharacterSet().include('d', 's')); + AssertThat(intersection, Equals(CharacterSet() + .include('d', 's'))); + }); + + it("returns the empty set when the sets are disjoint", [&]() { + CharacterSet set1 = CharacterSet().include('a', 'z'); + intersection = set1.remove_set(CharacterSet().include('A', 'Z')); + AssertThat(set1, Equals(CharacterSet().include('a', 'z'))); + AssertThat(intersection, Equals(CharacterSet())); + }); + }); + + describe("when the subtractend has blacklisted characters", [&]() { + it("removes the included characters that are not excluded by the other set", [&]() { + CharacterSet set1 = CharacterSet().include('a', 'f'); + + intersection = set1.remove_set(CharacterSet() + .include_all() + .exclude('d', 'z')); + + AssertThat(set1, Equals(CharacterSet() + .include('d', 'f'))); + AssertThat(intersection, Equals(CharacterSet() + .include('a', 'c'))); + }); + }); }); - it("works when one set is a proper subset of the other", [&]() { - CharacterSet set1({ {'a','z'} }); - intersection = set1.remove_set(CharacterSet({ {'d', 's'} })); - AssertThat(set1, Equals(CharacterSet({ {'a', 'c'}, {'t', 'z'} }))); - AssertThat(intersection, Equals(CharacterSet({ {'d', 's'} }))); + describe("for a set with blacklisted characters", [&]() { + describe("when the subtractend has whitelisted characters", [&]() { + it("adds the subtractend's inclusions to the receiver's exclusions", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .exclude('a', 'f'); + + intersection = set1.remove_set(CharacterSet() + .include('x', 'z')); + + AssertThat(set1, Equals(CharacterSet() + .include_all() + .exclude('a', 'f') + .exclude('x', 'z'))); + + AssertThat(intersection, Equals(CharacterSet().include('x', 'z'))); + }); + }); + + describe("when the subtractend has blacklisted characters", [&]() { + it("includes only the characters excluded by the subtractend but not by the receiver", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .exclude('a', 'm'); + + set1.remove_set(CharacterSet() + .include_all() + .exclude('d', 'z')); + + AssertThat(set1, Equals(CharacterSet() + .include('n', 'z'))); + }); + + it("returns the characters excluded by neither set", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .exclude('a', 'm'); + + intersection = set1.remove_set(CharacterSet() + .include_all() + .exclude('d', 'z')); + + AssertThat(intersection, Equals(CharacterSet() + .include_all() + .exclude('a', 'z'))); + }); + + it("works when the sets are disjoint", [&]() { + CharacterSet set1 = CharacterSet() + .include_all() + .exclude('a', 'm'); + + intersection = set1.remove_set(CharacterSet() + .include_all() + .exclude('d', 'z')); + + AssertThat(set1, Equals(CharacterSet() + .include('n', 'z'))); + + AssertThat(intersection, Equals(CharacterSet() + .include_all() + .exclude('a', 'z'))); + }); + }); + }); + }); + + describe("::included_ranges", [&]() { + it("consolidates sequences of 3 or more consecutive characters into ranges", [&]() { + CharacterSet set1 = CharacterSet() + .include('a', 'c') + .include('g') + .include('z'); + + AssertThat(set1.included_ranges(), Equals(vector({ + CharacterRange('a', 'c'), + CharacterRange('g'), + CharacterRange('z'), + }))); }); - it("works for a set that overlaps the right side", [&]() { - CharacterSet set1({ {'a','s'} }); - intersection = set1.remove_set(CharacterSet({ {'m', 'z'} })); - AssertThat(set1, Equals(CharacterSet({ {'a', 'l'} }))); - AssertThat(intersection, Equals(CharacterSet({ {'m', 's'} }))); - }); + it("doesn't consolidate sequences of 2 consecutive characters", [&]() { + CharacterSet set1 = CharacterSet() + .include('a', 'b') + .include('g') + .include('z'); - it("works for a set that overlaps the left side", [&]() { - CharacterSet set2({ {'m','z'} }); - intersection = set2.remove_set(CharacterSet({ {'a', 's'} })); - AssertThat(set2, Equals(CharacterSet({ {'t', 'z'} }))); - AssertThat(intersection, Equals(CharacterSet({ {'m', 's'} }))); - }); - - it("works for sets with multiple ranges", [&]() { - CharacterSet set1({ {'a', 'd'}, {'m', 'z'} }); - intersection = set1.remove_set(CharacterSet({ {'c', 'o'}, {'s', 'x'} })); - AssertThat(set1, Equals(CharacterSet({ {'a', 'b'}, {'p', 'r'}, {'y', 'z'} }))); - AssertThat(intersection, Equals(CharacterSet({ {'c', 'd'}, {'m', 'o'}, {'s', 'x'} }))); - }); - - it("works when the result is empty", [&]() { - CharacterSet set1({ 'd' }); - intersection = set1.remove_set(CharacterSet({ 'a', 'd', 'x' })); - AssertThat(set1, Equals(CharacterSet())); - AssertThat(intersection, Equals(CharacterSet({ 'd' }))); + AssertThat(set1.included_ranges(), Equals(vector({ + CharacterRange('a'), + CharacterRange('b'), + CharacterRange('g'), + CharacterRange('z'), + }))); }); }); }); diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index f376795f..76368952 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -38,8 +38,8 @@ class LexTableBuilder { if (symbol == rules::ERROR()) continue; else if (symbol == rules::END_OF_INPUT()) - result.insert( - LexItem(symbol, after_separators(CharacterSet({ 0 }).copy()))); + result.insert(LexItem( + symbol, after_separators(CharacterSet().include(0).copy()))); else if (symbol.is_token()) result.insert( LexItem(symbol, after_separators(lex_grammar.rule(symbol)))); @@ -52,9 +52,11 @@ class LexTableBuilder { if (pair == lex_state_ids.end()) { LexStateId state_id = lex_table.add_state(); lex_state_ids[item_set] = state_id; + add_accept_token_actions(item_set, state_id); add_advance_actions(item_set, state_id); add_token_start(item_set, state_id); + return state_id; } else { return pair->second; @@ -100,10 +102,10 @@ class LexTableBuilder { } CharacterSet separator_set() const { - set ranges; + CharacterSet result; for (char c : lex_grammar.separators) - ranges.insert(c); - return CharacterSet(ranges); + result.include(c); + return result; } rules::rule_ptr after_separators(rules::rule_ptr rule) { diff --git a/src/compiler/build_tables/merge_transitions.h b/src/compiler/build_tables/merge_transitions.h index 59bafde1..b7595a7f 100644 --- a/src/compiler/build_tables/merge_transitions.h +++ b/src/compiler/build_tables/merge_transitions.h @@ -68,7 +68,7 @@ void merge_char_transition(std::map *left, left->insert(pairs_to_insert.begin(), pairs_to_insert.end()); if (!new_char_set.is_empty()) - left->insert({ new_char_set, new_pair.second }); + left->insert({ new_char_set, new_value }); } } // namespace build_tables diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index cc331766..f9817693 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -245,12 +245,13 @@ class CCodeGenerator { } } - void condition_for_character_set(const rules::CharacterSet &set) { - if (set.ranges.size() == 1) { - add(condition_for_character_range(*set.ranges.begin())); + void condition_for_character_ranges( + const vector &ranges) { + if (ranges.size() == 1) { + add(condition_for_character_range(*ranges.begin())); } else { bool first = true; - for (auto &match : set.ranges) { + for (auto &match : ranges) { string part = "(" + condition_for_character_range(match) + ")"; if (first) { add(part); @@ -263,15 +264,13 @@ class CCodeGenerator { } } - void condition_for_character_rule(const rules::CharacterSet &rule) { - pair representation = - rule.most_compact_representation(); - if (representation.second) { - condition_for_character_set(representation.first); - } else { + void condition_for_character_set(const rules::CharacterSet &rule) { + if (rule.includes_all) { add("!("); - condition_for_character_set(rule.complement()); + condition_for_character_ranges(rule.excluded_ranges()); add(")"); + } else { + condition_for_character_ranges(rule.included_ranges()); } } @@ -319,7 +318,7 @@ class CCodeGenerator { line("START_TOKEN();"); for (auto pair : lex_state.actions) if (!pair.first.is_empty()) - _if([&]() { condition_for_character_rule(pair.first); }, + _if([&]() { condition_for_character_set(pair.first); }, [&]() { code_for_lex_actions(pair.second, expected_inputs); }); code_for_lex_actions(lex_state.default_action, expected_inputs); } diff --git a/src/compiler/prepare_grammar/expand_tokens.cc b/src/compiler/prepare_grammar/expand_tokens.cc index cd643aa5..2650f291 100644 --- a/src/compiler/prepare_grammar/expand_tokens.cc +++ b/src/compiler/prepare_grammar/expand_tokens.cc @@ -28,7 +28,7 @@ class ExpandTokens : public rules::IdentityRuleFn { rule_ptr apply_to(const String *rule) { vector elements; for (char val : rule->value) - elements.push_back(rules::CharacterSet({ val }).copy()); + elements.push_back(rules::CharacterSet().include(val).copy()); return rules::Seq::Build(elements); } diff --git a/src/compiler/prepare_grammar/parse_regex.cc b/src/compiler/prepare_grammar/parse_regex.cc index cb4d4f8e..1c69a8e1 100644 --- a/src/compiler/prepare_grammar/parse_regex.cc +++ b/src/compiler/prepare_grammar/parse_regex.cc @@ -115,7 +115,7 @@ class PatternParser { case ']': { return error("unmatched close square bracket"); } case '.': { next(); - return { CharacterSet({ '\n' }).complement().copy(), nullptr }; + return { CharacterSet().include_all().exclude('\n').copy(), nullptr }; } default: { auto pair = single_char(); @@ -127,20 +127,24 @@ class PatternParser { } pair char_set() { + CharacterSet result; bool is_affirmative = true; if (peek() == '^') { next(); is_affirmative = false; + result.include_all(); } - CharacterSet result; + while (has_more_input() && (peek() != ']')) { auto pair = single_char(); if (pair.second) return { CharacterSet(), pair.second }; - result.add_set(pair.first); + if (is_affirmative) + result.add_set(pair.first); + else + result.remove_set(pair.first); } - if (!is_affirmative) - result = result.complement(); + return { result, nullptr }; } @@ -157,10 +161,10 @@ class PatternParser { next(); if (peek() == '-') { next(); - value = CharacterSet({ CharacterRange(first_char, peek()) }); + value = CharacterSet().include(first_char, peek()); next(); } else { - value = CharacterSet({ first_char }); + value = CharacterSet().include(first_char); } } return { value, nullptr }; @@ -169,19 +173,20 @@ class PatternParser { CharacterSet escaped_char(char value) { switch (value) { case 'a': - return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' } }); + return CharacterSet().include('a', 'z').include('A', 'Z'); case 'w': - return CharacterSet({ { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } }); + return CharacterSet().include('a', 'z').include('A', 'Z').include('0', + '9'); case 'd': - return CharacterSet({ { '0', '9' } }); + return CharacterSet().include('0', '9'); case 't': - return CharacterSet({ '\t' }); + return CharacterSet().include('\t'); case 'n': - return CharacterSet({ '\n' }); + return CharacterSet().include('\n'); case 'r': - return CharacterSet({ '\r' }); + return CharacterSet().include('\r'); default: - return CharacterSet({ value }); + return CharacterSet().include(value); } } diff --git a/src/compiler/rules/character_range.cc b/src/compiler/rules/character_range.cc index 3927af90..bff44033 100644 --- a/src/compiler/rules/character_range.cc +++ b/src/compiler/rules/character_range.cc @@ -5,6 +5,7 @@ namespace tree_sitter { namespace rules { +using std::ostream; using std::string; static const unsigned char MAX_CHAR = -1; @@ -53,5 +54,9 @@ string CharacterRange::to_string() const { return string() + escape_character(min) + "-" + escape_character(max); } +ostream &operator<<(ostream &stream, const CharacterRange &range) { + return stream << range.to_string(); +} + } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/character_range.h b/src/compiler/rules/character_range.h index ed0f0620..c11e6c75 100644 --- a/src/compiler/rules/character_range.h +++ b/src/compiler/rules/character_range.h @@ -20,6 +20,8 @@ struct CharacterRange { std::string to_string() const; }; +std::ostream &operator<<(std::ostream &stream, const CharacterRange &rule); + } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/character_set.cc b/src/compiler/rules/character_set.cc index 602d9325..5b0fec35 100644 --- a/src/compiler/rules/character_set.cc +++ b/src/compiler/rules/character_set.cc @@ -1,6 +1,7 @@ #include "compiler/rules/character_set.h" #include #include +#include #include "compiler/rules/visitor.h" namespace tree_sitter { @@ -9,32 +10,87 @@ namespace rules { using std::string; using std::hash; using std::set; -using std::pair; -using std::initializer_list; +using std::vector; -static const unsigned char MAX_CHAR = -1; +static void add_range(set *characters, CharacterRange range) { + for (uint32_t c = range.min; c <= range.max; c++) + characters->insert(c); +} -CharacterSet::CharacterSet() : ranges({}) {} -CharacterSet::CharacterSet(const set &ranges) - : ranges(ranges) {} -CharacterSet::CharacterSet(const initializer_list &ranges) - : ranges(ranges) {} +static void remove_range(set *characters, CharacterRange range) { + for (uint32_t c = range.min; c <= range.max; c++) + characters->erase(c); +} + +static set remove_chars(set *left, + const set &right) { + set result; + for (uint32_t c : right) { + if (left->erase(c)) + result.insert(c); + } + return result; +} + +static set add_chars(set *left, + const set &right) { + set result; + for (uint32_t c : right) + if (left->insert(c).second) + result.insert(c); + return result; +} + +static vector consolidate_ranges(const set &chars) { + vector result; + for (uint32_t c : chars) { + size_t size = result.size(); + if (size >= 2 && result[size - 2].max == (c - 2)) { + result.pop_back(); + result.back().max = c; + } else if (size >= 1) { + CharacterRange &last = result.back(); + if (last.min < last.max && last.max == (c - 1)) + last.max = c; + else + result.push_back(c); + } else { + result.push_back(c); + } + } + return result; +} + +CharacterSet::CharacterSet() + : includes_all(false), included_chars({}), excluded_chars({}) {} bool CharacterSet::operator==(const Rule &rule) const { const CharacterSet *other = dynamic_cast(&rule); - return other && (ranges == other->ranges); + return other && (includes_all == other->includes_all) && + (included_chars == other->included_chars) && + (excluded_chars == other->excluded_chars); } bool CharacterSet::operator<(const CharacterSet &other) const { - return ranges < other.ranges; + if (!includes_all && other.includes_all) + return true; + if (includes_all && !other.includes_all) + return false; + if (included_chars < other.included_chars) + return true; + if (other.included_chars < included_chars) + return false; + return excluded_chars < other.excluded_chars; } size_t CharacterSet::hash_code() const { - size_t result = std::hash()(ranges.size()); - for (auto &range : ranges) { - result ^= std::hash()(range.min); - result ^= std::hash()(range.max); - } + size_t result = hash()(includes_all); + result ^= hash()(included_chars.size()); + for (auto &c : included_chars) + result ^= hash()(c); + result ^= hash()(excluded_chars.size()); + for (auto &c : excluded_chars) + result ^= hash()(c); return result; } @@ -44,97 +100,88 @@ rule_ptr CharacterSet::copy() const { string CharacterSet::to_string() const { string result("(char"); - for (auto &range : ranges) - result += " " + range.to_string(); + if (includes_all) + result += " include_all"; + if (!included_chars.empty()) { + result += " (include"; + for (auto r : included_ranges()) + result += string(" ") + r.to_string(); + result += ")"; + } + if (!excluded_chars.empty()) { + result += " (exclude"; + for (auto r : excluded_ranges()) + result += string(" ") + r.to_string(); + result += ")"; + } return result + ")"; } -CharacterSet CharacterSet::complement() const { - CharacterSet result({ { 0, MAX_CHAR } }); - result.remove_set(*this); - return result; +CharacterSet &CharacterSet::include_all() { + includes_all = true; + return *this; } -std::pair CharacterSet::most_compact_representation() - const { - auto first_range = *ranges.begin(); - if (first_range.min == 0 && first_range.max > 0) { - return { this->complement(), false }; - } else { - return { *this, true }; - } +CharacterSet &CharacterSet::include(uint32_t min, uint32_t max) { + if (includes_all) + remove_range(&excluded_chars, CharacterRange(min, max)); + else + add_range(&included_chars, CharacterRange(min, max)); + return *this; } -void add_range(CharacterSet *self, CharacterRange addition) { - set new_ranges; - for (auto range : self->ranges) { - bool is_adjacent = false; - if (range.min < addition.min && range.max >= addition.min - 1) { - is_adjacent = true; - addition.min = range.min; - } - if (range.max > addition.max && range.min <= addition.max + 1) { - is_adjacent = true; - addition.max = range.max; - } - if (!is_adjacent) { - new_ranges.insert(range); - } - } - new_ranges.insert(addition); - self->ranges = new_ranges; +CharacterSet &CharacterSet::exclude(uint32_t min, uint32_t max) { + if (includes_all) + add_range(&excluded_chars, CharacterRange(min, max)); + else + remove_range(&included_chars, CharacterRange(min, max)); + return *this; } -CharacterSet remove_range(CharacterSet *self, CharacterRange range_to_remove) { - CharacterSet removed_set; - set new_ranges; - for (auto range : self->ranges) { - if (range_to_remove.min <= range.min) { - if (range_to_remove.max < range.min) { - new_ranges.insert(range); - } else if (range_to_remove.max < range.max) { - new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max)); - add_range(&removed_set, CharacterRange(range.min, range_to_remove.max)); - } else { - add_range(&removed_set, range); - } - } else if (range_to_remove.min <= range.max) { - if (range_to_remove.max < range.max) { - new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1)); - new_ranges.insert(CharacterRange(range_to_remove.max + 1, range.max)); - add_range(&removed_set, range_to_remove); - } else { - new_ranges.insert(CharacterRange(range.min, range_to_remove.min - 1)); - add_range(&removed_set, CharacterRange(range_to_remove.min, range.max)); - } - } else { - new_ranges.insert(range); - } - } - self->ranges = new_ranges; - return removed_set; -} +CharacterSet &CharacterSet::include(uint32_t c) { return include(c, c); } -bool CharacterSet::is_empty() const { return ranges.empty(); } +CharacterSet &CharacterSet::exclude(uint32_t c) { return exclude(c, c); } + +bool CharacterSet::is_empty() const { + return !includes_all && included_chars.empty(); +} void CharacterSet::add_set(const CharacterSet &other) { - for (auto &other_range : other.ranges) { - add_range(this, other_range); - } + for (uint32_t c : other.included_chars) + included_chars.insert(c); } CharacterSet CharacterSet::remove_set(const CharacterSet &other) { CharacterSet result; - for (auto &other_range : other.ranges) { - auto removed_set = remove_range(this, other_range); - result.add_set(removed_set); + if (includes_all) { + if (other.includes_all) { + result.includes_all = true; + result.excluded_chars = excluded_chars; + included_chars = add_chars(&result.excluded_chars, other.excluded_chars); + excluded_chars = {}; + includes_all = false; + } else { + result.included_chars = add_chars(&excluded_chars, other.included_chars); + } + } else { + if (other.includes_all) { + result.included_chars = included_chars; + included_chars = + remove_chars(&result.included_chars, other.excluded_chars); + } else { + result.included_chars = + remove_chars(&included_chars, other.included_chars); + } } return result; } -CharacterSet CharacterSet::intersect(const CharacterSet &set) const { - CharacterSet copy = *this; - return copy.remove_set(set); +vector CharacterSet::included_ranges() const { + return consolidate_ranges(included_chars); +} + +vector CharacterSet::excluded_ranges() const { + return consolidate_ranges(excluded_chars); } void CharacterSet::accept(Visitor *visitor) const { visitor->visit(this); } diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index 6c71cf5f..9fed4aea 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -1,10 +1,11 @@ #ifndef COMPILER_RULES_CHARACTER_SET_H_ #define COMPILER_RULES_CHARACTER_SET_H_ -#include #include +#include #include #include +#include #include "compiler/rules/rule.h" #include "compiler/rules/character_range.h" @@ -14,8 +15,12 @@ namespace rules { class CharacterSet : public Rule { public: CharacterSet(); - explicit CharacterSet(const std::set &ranges); - explicit CharacterSet(const std::initializer_list &ranges); + + CharacterSet &include_all(); + CharacterSet &include(uint32_t c); + CharacterSet &include(uint32_t min, uint32_t max); + CharacterSet &exclude(uint32_t c); + CharacterSet &exclude(uint32_t min, uint32_t max); bool operator==(const Rule &other) const; bool operator<(const CharacterSet &) const; @@ -26,12 +31,14 @@ class CharacterSet : public Rule { void add_set(const CharacterSet &other); CharacterSet remove_set(const CharacterSet &other); - CharacterSet complement() const; - CharacterSet intersect(const CharacterSet &) const; - std::pair most_compact_representation() const; bool is_empty() const; - std::set ranges; + std::vector included_ranges() const; + std::vector excluded_ranges() const; + + bool includes_all; + std::set included_chars; + std::set excluded_chars; }; } // namespace rules