Properly merge transitions on overlapping character sets!

This commit is contained in:
Max Brunsfeld 2014-02-10 13:20:43 -08:00
parent 905a408998
commit 8baa1396fd
15 changed files with 330 additions and 207 deletions

View file

@ -30,6 +30,22 @@ describe("rule transitions", []() {
})));
});
it("handles choices between overlapping character sets", [&]() {
AssertThat(
char_transitions(choice({
seq({
character({ {'a', 's'} }),
sym("x") }),
seq({
character({ { 'm', 'z' } }),
sym("y") }) })),
Equals(transition_map<CharacterSet, Rule>({
{ character({ {'a','l'} }), sym("x") },
{ character({ {'m','s'} }), choice({ sym("x"), sym("y") }) },
{ character({ {'t','z'} }), sym("y") },
})));
});
it("handles sequences", [&]() {
AssertThat(
sym_transitions(seq({ symbol1, symbol2 })),

View file

@ -33,7 +33,13 @@ namespace test_grammars {
str("["),
comma_sep(sym("value")),
str("]"), }) },
{ "string", pattern("\"[^\"]+\"") },
{ "string", seq({
character('"'),
repeat(choice({
pattern("[^\"]"),
str("\\\""),
})),
character('"') }) },
{ "number", pattern("\\d+") }
});
}

View file

@ -2,28 +2,28 @@
#include <ctype.h>
enum ts_symbol {
ts_symbol_factor,
ts_aux_token1,
ts_symbol_plus,
ts_aux_token2,
ts_symbol_factor,
ts_symbol_variable,
ts_symbol_term,
ts_symbol_expression,
ts_aux_token1,
ts_symbol_number,
ts_symbol_times,
ts_symbol_term,
ts_symbol_variable,
ts_symbol_expression,
ts_aux_token2,
ts_symbol___END__,
};
static const char *ts_symbol_names[] = {
"factor",
"token1",
"plus",
"token2",
"factor",
"variable",
"term",
"expression",
"token1",
"number",
"times",
"term",
"variable",
"expression",
"token2",
"__END__",
};
@ -31,58 +31,58 @@ static void ts_lex(TSParser *parser) {
START_LEXER();
switch (LEX_STATE()) {
case 0:
if ((LOOKAHEAD_CHAR() == '\0'))
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(1, EXPECT({"<EOF>"}));
case 1:
ACCEPT_TOKEN(ts_symbol___END__);
case 2:
if ((LOOKAHEAD_CHAR() == '*'))
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(3);
if ((LOOKAHEAD_CHAR() == '\0'))
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(2, EXPECT({"<EOF>", "*"}));
case 3:
ACCEPT_TOKEN(ts_symbol_times);
case 4:
if ((LOOKAHEAD_CHAR() == ')'))
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
LEX_ERROR(1, EXPECT({")"}));
case 5:
ACCEPT_TOKEN(ts_aux_token2);
case 6:
if ((LOOKAHEAD_CHAR() == ')'))
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
if ((LOOKAHEAD_CHAR() == '*'))
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(3);
LEX_ERROR(1, EXPECT({")-*"}));
case 7:
if ((LOOKAHEAD_CHAR() == ')'))
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
if ((LOOKAHEAD_CHAR() == '*'))
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(3);
if ((LOOKAHEAD_CHAR() == '+'))
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(8);
LEX_ERROR(1, EXPECT({")-+"}));
case 8:
ACCEPT_TOKEN(ts_symbol_plus);
case 9:
if ((LOOKAHEAD_CHAR() == ')'))
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(5);
if ((LOOKAHEAD_CHAR() == '+'))
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(8);
LEX_ERROR(2, EXPECT({")", "+"}));
case 10:
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
ADVANCE(13);
if ((LOOKAHEAD_CHAR() == '('))
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(12);
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(11);
LEX_ERROR(4, EXPECT({"(", "0-9", "A-Z", "a-z"}));
case 11:
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(11);
ACCEPT_TOKEN(ts_symbol_number);
case 12:
@ -93,17 +93,17 @@ static void ts_lex(TSParser *parser) {
ADVANCE(13);
ACCEPT_TOKEN(ts_symbol_variable);
case 14:
if ((LOOKAHEAD_CHAR() == '+'))
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(8);
if ((LOOKAHEAD_CHAR() == '\0'))
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(2, EXPECT({"<EOF>", "+"}));
case 15:
if ((LOOKAHEAD_CHAR() == '*'))
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(3);
if ((LOOKAHEAD_CHAR() == '+'))
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(8);
if ((LOOKAHEAD_CHAR() == '\0'))
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(2, EXPECT({"<EOF>", "*-+"}));
default:
@ -118,16 +118,16 @@ static TSParseResult ts_parse(const char *input) {
case 0:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(45);
case ts_aux_token1:
SHIFT(42);
case ts_symbol_number:
SHIFT(41);
case ts_symbol_term:
SHIFT(2);
case ts_symbol_factor:
SHIFT(45);
case ts_symbol_variable:
SHIFT(41);
case ts_symbol_term:
SHIFT(2);
case ts_symbol_expression:
SHIFT(1);
default:
@ -188,14 +188,14 @@ static TSParseResult ts_parse(const char *input) {
case 6:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_aux_token1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_expression:
SHIFT(32);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
@ -250,14 +250,14 @@ static TSParseResult ts_parse(const char *input) {
case 11:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_aux_token1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_expression:
SHIFT(23);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
@ -280,14 +280,14 @@ static TSParseResult ts_parse(const char *input) {
case 13:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_aux_token1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_expression:
SHIFT(14);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
@ -354,14 +354,14 @@ static TSParseResult ts_parse(const char *input) {
case 19:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_aux_token1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_expression:
SHIFT(20);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
@ -450,14 +450,14 @@ static TSParseResult ts_parse(const char *input) {
case 28:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_aux_token1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_expression:
SHIFT(29);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
@ -542,14 +542,14 @@ static TSParseResult ts_parse(const char *input) {
case 37:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_aux_token1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_expression:
SHIFT(38);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
@ -584,10 +584,10 @@ static TSParseResult ts_parse(const char *input) {
case 41:
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_times:
REDUCE(ts_symbol_factor, 1, COLLAPSE({0}));
case ts_symbol_plus:
REDUCE(ts_symbol_factor, 1, COLLAPSE({0}));
case ts_symbol_times:
REDUCE(ts_symbol_factor, 1, COLLAPSE({0}));
case ts_symbol___END__:
REDUCE(ts_symbol_factor, 1, COLLAPSE({0}));
default:
@ -596,14 +596,14 @@ static TSParseResult ts_parse(const char *input) {
case 42:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_aux_token1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_expression:
SHIFT(43);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:
@ -622,10 +622,10 @@ static TSParseResult ts_parse(const char *input) {
case 44:
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_times:
REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1}));
case ts_symbol_plus:
REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1}));
case ts_symbol_times:
REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1}));
case ts_symbol___END__:
REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1}));
default:
@ -634,10 +634,10 @@ static TSParseResult ts_parse(const char *input) {
case 45:
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol___END__:
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
case ts_symbol_plus:
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
case ts_symbol___END__:
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
case ts_symbol_times:
SHIFT(46);
default:
@ -670,14 +670,14 @@ static TSParseResult ts_parse(const char *input) {
case 48:
SET_LEX_STATE(10);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_factor:
SHIFT(16);
case ts_aux_token1:
SHIFT(13);
case ts_symbol_number:
SHIFT(12);
case ts_symbol_expression:
SHIFT(49);
case ts_symbol_factor:
SHIFT(16);
case ts_symbol_variable:
SHIFT(12);
case ts_symbol_term:

View file

@ -4,116 +4,154 @@
enum ts_symbol {
ts_aux_token6,
ts_symbol_number,
ts_symbol_object,
ts_aux_token5,
ts_aux_token7,
ts_aux_token4,
ts_aux_repeat_helper2,
ts_aux_token1,
ts_aux_token3,
ts_symbol_value,
ts_symbol_string,
ts_aux_token2,
ts_aux_token3,
ts_aux_token5,
ts_symbol_array,
ts_aux_repeat_helper1,
ts_aux_token7,
ts_aux_token4,
ts_symbol___END__,
ts_aux_token2,
ts_aux_repeat_helper2,
ts_aux_token1,
ts_symbol_object,
ts_symbol_value,
};
static const char *ts_symbol_names[] = {
"token6",
"number",
"object",
"token5",
"token7",
"token4",
"repeat_helper2",
"token1",
"token3",
"value",
"string",
"token2",
"token3",
"token5",
"array",
"repeat_helper1",
"token7",
"token4",
"__END__",
"token2",
"repeat_helper2",
"token1",
"object",
"value",
};
static void ts_lex(TSParser *parser) {
START_LEXER();
switch (LEX_STATE()) {
case 0:
if ((LOOKAHEAD_CHAR() == '\0'))
if (LOOKAHEAD_CHAR() == '\0')
ADVANCE(1);
LEX_ERROR(1, EXPECT({"<EOF>"}));
case 1:
ACCEPT_TOKEN(ts_symbol___END__);
case 2:
if ((LOOKAHEAD_CHAR() == ','))
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(3);
ACCEPT_TOKEN(ts_aux_token3);
case 3:
ACCEPT_TOKEN(ts_aux_token2);
case 4:
if ((LOOKAHEAD_CHAR() == ']'))
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(5);
LEX_ERROR(1, EXPECT({"]"}));
case 5:
ACCEPT_TOKEN(ts_aux_token4);
case 6:
if ((LOOKAHEAD_CHAR() == ']'))
if (LOOKAHEAD_CHAR() == ']')
ADVANCE(5);
if ((LOOKAHEAD_CHAR() == ','))
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(3);
LEX_ERROR(2, EXPECT({",", "]"}));
case 7:
if ((LOOKAHEAD_CHAR() == '}'))
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(8);
LEX_ERROR(1, EXPECT({"}"}));
case 8:
ACCEPT_TOKEN(ts_aux_token7);
case 9:
if ((LOOKAHEAD_CHAR() == '}'))
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(8);
if ((LOOKAHEAD_CHAR() == ','))
if (LOOKAHEAD_CHAR() == ',')
ADVANCE(3);
LEX_ERROR(2, EXPECT({",", "}"}));
case 10:
if ((LOOKAHEAD_CHAR() == '{'))
ADVANCE(16);
if ((LOOKAHEAD_CHAR() == '['))
ADVANCE(15);
if ((LOOKAHEAD_CHAR() == '\"'))
if (LOOKAHEAD_CHAR() == '[')
ADVANCE(18);
if (LOOKAHEAD_CHAR() == '{')
ADVANCE(19);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(11);
LEX_ERROR(4, EXPECT({"\"", "0-9", "[", "{"}));
case 11:
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
ADVANCE(11);
ACCEPT_TOKEN(ts_symbol_number);
case 12:
if (!((LOOKAHEAD_CHAR() == '\"')))
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(14);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(13);
LEX_ERROR(2, EXPECT({"<EOF>-!", "#-<MAX>"}));
case 13:
if ((LOOKAHEAD_CHAR() == '\"'))
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(17);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(14);
if (!((LOOKAHEAD_CHAR() == '\"')))
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(13);
LEX_ERROR(1, EXPECT({"<ANY>"}));
case 14:
ACCEPT_TOKEN(ts_symbol_string);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(16);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(14);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(13);
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
ADVANCE(13);
LEX_ERROR(2, EXPECT({"<ANY>", "#-\""}));
case 15:
ACCEPT_TOKEN(ts_aux_token1);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(13);
LEX_ERROR(1, EXPECT({"\""}));
case 16:
ACCEPT_TOKEN(ts_aux_token5);
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(17);
if (LOOKAHEAD_CHAR() == '\\')
ADVANCE(14);
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
ADVANCE(15);
if (!((LOOKAHEAD_CHAR() == '\"') ||
(LOOKAHEAD_CHAR() == '\\')))
ADVANCE(13);
ACCEPT_TOKEN(ts_symbol_string);
case 17:
if ((LOOKAHEAD_CHAR() == ':'))
ADVANCE(18);
LEX_ERROR(1, EXPECT({":"}));
ACCEPT_TOKEN(ts_symbol_string);
case 18:
ACCEPT_TOKEN(ts_aux_token6);
ACCEPT_TOKEN(ts_aux_token1);
case 19:
if ((LOOKAHEAD_CHAR() == '\"'))
ACCEPT_TOKEN(ts_aux_token5);
case 20:
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(21);
LEX_ERROR(1, EXPECT({":"}));
case 21:
ACCEPT_TOKEN(ts_aux_token6);
case 22:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
LEX_ERROR(1, EXPECT({"\""}));
default:
@ -132,16 +170,16 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(53);
case ts_symbol_array:
SHIFT(53);
case ts_symbol_value:
SHIFT(1);
case ts_symbol_object:
SHIFT(53);
case ts_symbol_number:
SHIFT(53);
case ts_aux_token5:
SHIFT(47);
case ts_symbol_number:
SHIFT(53);
case ts_aux_token1:
SHIFT(2);
case ts_symbol_value:
SHIFT(1);
default:
PARSE_PANIC();
}
@ -160,14 +198,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(25);
case ts_symbol_array:
SHIFT(25);
case ts_symbol_value:
SHIFT(44);
case ts_symbol_object:
SHIFT(25);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token5:
SHIFT(12);
case ts_symbol_value:
SHIFT(44);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token1:
SHIFT(3);
default:
@ -180,14 +218,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(25);
case ts_symbol_array:
SHIFT(25);
case ts_symbol_value:
SHIFT(4);
case ts_symbol_object:
SHIFT(25);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token5:
SHIFT(12);
case ts_symbol_value:
SHIFT(4);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token1:
SHIFT(3);
default:
@ -230,14 +268,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(43);
case ts_symbol_array:
SHIFT(43);
case ts_symbol_value:
SHIFT(41);
case ts_symbol_object:
SHIFT(43);
case ts_symbol_number:
SHIFT(43);
case ts_symbol_value:
SHIFT(41);
case ts_aux_token5:
SHIFT(35);
case ts_symbol_number:
SHIFT(43);
case ts_aux_token1:
SHIFT(8);
default:
@ -250,14 +288,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(25);
case ts_symbol_array:
SHIFT(25);
case ts_symbol_value:
SHIFT(9);
case ts_symbol_object:
SHIFT(25);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token5:
SHIFT(12);
case ts_symbol_value:
SHIFT(9);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token1:
SHIFT(3);
default:
@ -294,7 +332,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 12:
SET_LEX_STATE(19);
SET_LEX_STATE(22);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(13);
@ -302,7 +340,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 13:
SET_LEX_STATE(17);
SET_LEX_STATE(20);
switch (LOOKAHEAD_SYM()) {
case ts_aux_token6:
SHIFT(14);
@ -316,14 +354,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(25);
case ts_symbol_array:
SHIFT(25);
case ts_symbol_value:
SHIFT(15);
case ts_symbol_object:
SHIFT(25);
case ts_symbol_number:
SHIFT(25);
case ts_symbol_value:
SHIFT(15);
case ts_aux_token5:
SHIFT(12);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token1:
SHIFT(3);
default:
@ -360,7 +398,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 18:
SET_LEX_STATE(19);
SET_LEX_STATE(22);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(19);
@ -368,7 +406,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 19:
SET_LEX_STATE(17);
SET_LEX_STATE(20);
switch (LOOKAHEAD_SYM()) {
case ts_aux_token6:
SHIFT(20);
@ -382,14 +420,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(34);
case ts_symbol_array:
SHIFT(34);
case ts_symbol_value:
SHIFT(32);
case ts_symbol_object:
SHIFT(34);
case ts_symbol_number:
SHIFT(34);
case ts_symbol_value:
SHIFT(32);
case ts_aux_token5:
SHIFT(26);
case ts_symbol_number:
SHIFT(34);
case ts_aux_token1:
SHIFT(21);
default:
@ -402,14 +440,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(25);
case ts_symbol_array:
SHIFT(25);
case ts_symbol_value:
SHIFT(22);
case ts_symbol_object:
SHIFT(25);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token5:
SHIFT(12);
case ts_symbol_value:
SHIFT(22);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token1:
SHIFT(3);
default:
@ -456,7 +494,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 26:
SET_LEX_STATE(19);
SET_LEX_STATE(22);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(27);
@ -464,7 +502,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 27:
SET_LEX_STATE(17);
SET_LEX_STATE(20);
switch (LOOKAHEAD_SYM()) {
case ts_aux_token6:
SHIFT(28);
@ -478,14 +516,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(25);
case ts_symbol_array:
SHIFT(25);
case ts_symbol_value:
SHIFT(29);
case ts_symbol_object:
SHIFT(25);
case ts_symbol_number:
SHIFT(25);
case ts_symbol_value:
SHIFT(29);
case ts_aux_token5:
SHIFT(12);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token1:
SHIFT(3);
default:
@ -524,10 +562,10 @@ static TSParseResult ts_parse(const char *input) {
case 32:
SET_LEX_STATE(9);
switch (LOOKAHEAD_SYM()) {
case ts_aux_token2:
SHIFT(18);
case ts_aux_token7:
REDUCE(ts_aux_repeat_helper1, 4, COLLAPSE({1, 0, 1, 0}));
case ts_aux_token2:
SHIFT(18);
case ts_aux_repeat_helper1:
SHIFT(33);
default:
@ -552,7 +590,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 35:
SET_LEX_STATE(19);
SET_LEX_STATE(22);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(36);
@ -560,7 +598,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 36:
SET_LEX_STATE(17);
SET_LEX_STATE(20);
switch (LOOKAHEAD_SYM()) {
case ts_aux_token6:
SHIFT(37);
@ -574,14 +612,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(25);
case ts_symbol_array:
SHIFT(25);
case ts_symbol_value:
SHIFT(38);
case ts_symbol_object:
SHIFT(25);
case ts_symbol_number:
SHIFT(25);
case ts_symbol_value:
SHIFT(38);
case ts_aux_token5:
SHIFT(12);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token1:
SHIFT(3);
default:
@ -620,10 +658,10 @@ static TSParseResult ts_parse(const char *input) {
case 41:
SET_LEX_STATE(6);
switch (LOOKAHEAD_SYM()) {
case ts_aux_token4:
REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0}));
case ts_aux_token2:
SHIFT(7);
case ts_aux_token4:
REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0}));
case ts_aux_repeat_helper2:
SHIFT(42);
default:
@ -676,7 +714,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 47:
SET_LEX_STATE(19);
SET_LEX_STATE(22);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(48);
@ -684,7 +722,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_PANIC();
}
case 48:
SET_LEX_STATE(17);
SET_LEX_STATE(20);
switch (LOOKAHEAD_SYM()) {
case ts_aux_token6:
SHIFT(49);
@ -698,14 +736,14 @@ static TSParseResult ts_parse(const char *input) {
SHIFT(25);
case ts_symbol_array:
SHIFT(25);
case ts_symbol_value:
SHIFT(50);
case ts_symbol_object:
SHIFT(25);
case ts_symbol_number:
SHIFT(25);
case ts_symbol_value:
SHIFT(50);
case ts_aux_token5:
SHIFT(12);
case ts_symbol_number:
SHIFT(25);
case ts_aux_token1:
SHIFT(3);
default:

View file

@ -14,7 +14,7 @@ describe("json", []() {
});
it("parses strings", [&]() {
TSDocumentSetText(document, "\"this is a string\"");
TSDocumentSetText(document, "\"this is a \\\"string\\\" \"");
AssertThat(string(TSDocumentToString(document)), Equals("(value (string))"));
});

View file

@ -1,8 +1,8 @@
#include "item_set_transitions.h"
#include "item_set_closure.h"
#include "rule_transitions.h"
#include "merge_transitions.h"
using std::dynamic_pointer_cast;
using std::make_shared;
using std::shared_ptr;
@ -19,7 +19,7 @@ namespace tree_sitter {
transition_map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar) {
transition_map<CharacterSet, LexItemSet> result;
for (LexItem item : item_set) {
for (const LexItem &item : item_set) {
transition_map<CharacterSet, LexItemSet> item_transitions;
for (auto transition : char_transitions(item.rule)) {
auto rule = transition.first;
@ -28,7 +28,7 @@ namespace tree_sitter {
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
}
result.merge(item_transitions, [](shared_ptr<const LexItemSet> left, shared_ptr<const LexItemSet> right) -> shared_ptr<const LexItemSet> {
result = merge_char_transitions<LexItemSet>(result, item_transitions, [](shared_ptr<LexItemSet> left, shared_ptr<LexItemSet> right) {
return make_shared<LexItemSet>(merge_sets(*left, *right));
});
}
@ -38,7 +38,7 @@ namespace tree_sitter {
transition_map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar) {
transition_map<rules::Symbol, ParseItemSet> result;
for (ParseItem item : item_set) {
for (const ParseItem &item : item_set) {
transition_map<rules::Symbol, ParseItemSet> item_transitions;
for (auto transition : sym_transitions(item.rule)) {
auto rule = transition.first;
@ -49,7 +49,7 @@ namespace tree_sitter {
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
}
result.merge(item_transitions, [](shared_ptr<const ParseItemSet> left, shared_ptr<const ParseItemSet> right) -> shared_ptr<const ParseItemSet> {
result = merge_sym_transitions<ParseItemSet>(result, item_transitions, [](shared_ptr<ParseItemSet> left, shared_ptr<ParseItemSet> right) {
return make_shared<ParseItemSet>(merge_sets(*left, *right));
});
}

View file

@ -0,0 +1,56 @@
#ifndef __tree_sitter__merge_transitions__
#define __tree_sitter__merge_transitions__
#include "transition_map.h"
#include "character_set.h"
#include "symbol.h"
namespace tree_sitter {
namespace build_tables {
template<typename T>
transition_map<rules::Symbol, T>
merge_sym_transitions(const transition_map<rules::Symbol, T> &left,
const transition_map<rules::Symbol, T> &right,
std::function<std::shared_ptr<T>(std::shared_ptr<T>, std::shared_ptr<T>)> merge_fn) {
transition_map<rules::Symbol, T> result(left);
for (auto &pair : right) {
auto rule = pair.first;
bool merged = false;
for (auto &existing_pair : result) {
auto existing_rule = existing_pair.first;
if (existing_rule->operator==(*rule)) {
existing_pair.second = merge_fn(existing_pair.second, pair.second);
merged = true;
break;
}
}
if (!merged)
result.add(pair.first, pair.second);
}
return result;
}
template<typename T>
transition_map<rules::CharacterSet, T>
merge_char_transitions(const transition_map<rules::CharacterSet, T> &left,
const transition_map<rules::CharacterSet, T> &right,
std::function<std::shared_ptr<T>(std::shared_ptr<T>, std::shared_ptr<T>)> merge_fn) {
transition_map<rules::CharacterSet, T> result(left);
for (auto &pair : right) {
auto rule = pair.first;
for (auto &existing_pair : left) {
auto existing_rule = existing_pair.first;
auto intersection = existing_rule->remove_set(*rule);
if (!intersection.is_empty()) {
rule->remove_set(intersection);
result.add(std::make_shared<rules::CharacterSet>(intersection), merge_fn(existing_pair.second, pair.second));
}
}
result.add(rule, pair.second);
}
return result;
}
}
}
#endif

View file

@ -1,5 +1,6 @@
#include "rule_transitions.h"
#include "rules.h"
#include "merge_transitions.h"
using namespace tree_sitter::rules;
@ -9,6 +10,23 @@ namespace tree_sitter {
return typeid(*rule) == typeid(Blank);
}
template<typename T>
transition_map<T, Rule> merge_transitions(const transition_map<T, Rule> &left, const transition_map<T, Rule> &right);
template<>
transition_map<CharacterSet, Rule> merge_transitions(const transition_map<CharacterSet, Rule> &left, const transition_map<CharacterSet, Rule> &right) {
return merge_char_transitions<Rule>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
}
template<>
transition_map<Symbol, Rule> merge_transitions(const transition_map<Symbol, Rule> &left, const transition_map<Symbol, Rule> &right) {
return merge_sym_transitions<Rule>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
}
template<typename T>
class TransitionsVisitor : public rules::Visitor {
public:
@ -23,7 +41,7 @@ namespace tree_sitter {
void visit_atom(const Rule *rule) {
auto atom = dynamic_cast<const T *>(rule);
if (atom) {
value = transition_map<T, Rule>({{ std::make_shared<const T>(*atom), blank() }});
value = transition_map<T, Rule>({{ std::make_shared<T>(*atom), blank() }});
}
}
@ -37,9 +55,7 @@ namespace tree_sitter {
void visit(const Choice *rule) {
value = transitions(rule->left);
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
value = merge_transitions<T>(transitions(rule->left), transitions(rule->right));
}
void visit(const Seq *rule) {
@ -50,9 +66,7 @@ namespace tree_sitter {
return seq({ left_rule, rule->right });
});
if (rule_can_be_blank(rule->left)) {
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
value = merge_transitions<T>(value, transitions(rule->right));
}
}

View file

@ -8,13 +8,14 @@
namespace tree_sitter {
template<typename TKey, typename TValue>
class transition_map {
typedef std::shared_ptr<const TKey> TKeyPtr;
typedef std::shared_ptr<const TValue> TValuePtr;
typedef std::pair<const TKeyPtr, TValuePtr> pair_type;
typedef std::shared_ptr<TKey> TKeyPtr;
typedef std::shared_ptr<TValue> TValuePtr;
typedef std::pair<TKeyPtr, TValuePtr> pair_type;
typedef std::vector<pair_type> contents_type;
contents_type contents;
public:
transition_map() : contents(contents_type()) {};
transition_map(std::vector<pair_type> pairs) : contents(pairs) {};
@ -33,15 +34,6 @@ namespace tree_sitter {
contents.push_back(pair_type(key, value));
}
void merge(const transition_map<TKey, TValue> &other, std::function<TValuePtr(TValuePtr, TValuePtr)> merge_fn) {
for (pair_type other_pair : other) {
if (pair_type *current_pair = pair_for_key(*other_pair.first))
current_pair->second = merge_fn(current_pair->second, other_pair.second);
else
add(other_pair.first, other_pair.second);
}
}
TValuePtr operator[](const TKey &key) const {
for (auto pair : *this) {
if (*pair.first == key) {
@ -52,7 +44,7 @@ namespace tree_sitter {
}
template<typename NewV>
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<const NewV>(TValuePtr)> map_fn) {
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<NewV>(TValuePtr)> map_fn) {
transition_map<TKey, NewV> result;
for (pair_type pair : *this) {
auto new_value = map_fn(pair.second);
@ -70,18 +62,6 @@ namespace tree_sitter {
const_iterator begin() const { return contents.begin(); }
const_iterator end() const { return contents.end(); }
size_t size() const { return contents.size(); }
private:
pair_type * pair_for_key(const TKey &key) {
for (int i = 0; i < contents.size(); i++) {
pair_type *pair = &contents[i];
if (*pair->first == key) return pair;
}
return NULL;
}
contents_type contents;
};
template<typename K, typename V>

View file

@ -98,6 +98,8 @@ namespace tree_sitter {
return "\\0";
case '"':
return "\\\"";
case '\\':
return "\\\\";
default:
return string() + character;
}
@ -108,16 +110,20 @@ namespace tree_sitter {
if (range.min == range.max) {
return lookahead + " == '" + character_code(range.min) + "'";
} else {
return string("'") + range.min + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + range.max + "'";
return string("'") + character_code(range.min) + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + character_code(range.max) + "'";
}
}
string condition_for_character_set(const rules::CharacterSet &set) {
vector<string> parts;
for (auto &match : set.ranges)
parts.push_back("(" + condition_for_character_range(match) + ")");
return join(parts, " ||\n ");
if (set.ranges.size() == 1) {
return condition_for_character_range(*set.ranges.begin());
} else {
for (auto &match : set.ranges)
parts.push_back("(" + condition_for_character_range(match) + ")");
return join(parts, " ||\n ");
}
}
string condition_for_character_rule(const rules::CharacterSet &rule) {

View file

@ -154,6 +154,10 @@ namespace tree_sitter {
return removed_set;
}
bool CharacterSet::is_empty() const {
return ranges.empty();
}
void CharacterSet::add_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
add_range(this, other_range);

View file

@ -38,6 +38,7 @@ namespace tree_sitter {
CharacterSet complement() const;
CharacterSet intersect(const CharacterSet &) const;
std::pair<CharacterSet, bool> most_compact_representation() const;
bool is_empty() const;
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
@ -51,7 +52,7 @@ namespace tree_sitter {
std::set<CharacterRange> ranges;
};
typedef std::shared_ptr<const CharacterSet> char_ptr;
typedef std::shared_ptr<CharacterSet> char_ptr;
}
}

View file

@ -8,7 +8,7 @@ namespace tree_sitter {
class Visitor;
class Rule;
typedef std::shared_ptr<const Rule> rule_ptr;
typedef std::shared_ptr<Rule> rule_ptr;
class Rule {
public:

View file

@ -23,7 +23,7 @@ namespace tree_sitter {
bool is_auxiliary;
};
typedef std::shared_ptr<const Symbol> sym_ptr;
typedef std::shared_ptr<Symbol> sym_ptr;
}
}

View file

@ -101,6 +101,7 @@
125120A218307FFD00C9B56A /* test_grammars.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = test_grammars.h; path = spec/fixtures/grammars/test_grammars.h; sourceTree = SOURCE_ROOT; };
125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; };
12661BF318A1505A00A259FB /* character_set_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = character_set_spec.cpp; sourceTree = SOURCE_ROOT; };
127528AF18A6F9C6006B682B /* merge_transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = merge_transitions.h; sourceTree = "<group>"; };
12AB465D188BD03E00DE79DF /* follow_sets.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = follow_sets.cpp; sourceTree = "<group>"; };
12AB465E188BD03E00DE79DF /* follow_sets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = follow_sets.h; sourceTree = "<group>"; };
12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = item_set_closure_spec.cpp; sourceTree = "<group>"; };
@ -228,6 +229,7 @@
12EDCFBF18820880005A7A07 /* item_set_closure.h */,
12EDCFC118820A70005A7A07 /* item_set_transitions.cpp */,
12EDCFC218820A70005A7A07 /* item_set_transitions.h */,
127528AF18A6F9C6006B682B /* merge_transitions.h */,
12EDCFA418820137005A7A07 /* perform.cpp */,
12EDCFA518820137005A7A07 /* perform.h */,
12EDCFA618820137005A7A07 /* rule_transitions.cpp */,