Properly merge transitions on overlapping character sets!
This commit is contained in:
parent
905a408998
commit
8baa1396fd
15 changed files with 330 additions and 207 deletions
|
|
@ -30,6 +30,22 @@ describe("rule transitions", []() {
|
|||
})));
|
||||
});
|
||||
|
||||
it("handles choices between overlapping character sets", [&]() {
|
||||
AssertThat(
|
||||
char_transitions(choice({
|
||||
seq({
|
||||
character({ {'a', 's'} }),
|
||||
sym("x") }),
|
||||
seq({
|
||||
character({ { 'm', 'z' } }),
|
||||
sym("y") }) })),
|
||||
Equals(transition_map<CharacterSet, Rule>({
|
||||
{ character({ {'a','l'} }), sym("x") },
|
||||
{ character({ {'m','s'} }), choice({ sym("x"), sym("y") }) },
|
||||
{ character({ {'t','z'} }), sym("y") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles sequences", [&]() {
|
||||
AssertThat(
|
||||
sym_transitions(seq({ symbol1, symbol2 })),
|
||||
|
|
|
|||
8
spec/fixtures/grammars/json.cpp
vendored
8
spec/fixtures/grammars/json.cpp
vendored
|
|
@ -33,7 +33,13 @@ namespace test_grammars {
|
|||
str("["),
|
||||
comma_sep(sym("value")),
|
||||
str("]"), }) },
|
||||
{ "string", pattern("\"[^\"]+\"") },
|
||||
{ "string", seq({
|
||||
character('"'),
|
||||
repeat(choice({
|
||||
pattern("[^\"]"),
|
||||
str("\\\""),
|
||||
})),
|
||||
character('"') }) },
|
||||
{ "number", pattern("\\d+") }
|
||||
});
|
||||
}
|
||||
|
|
|
|||
114
spec/fixtures/parsers/arithmetic.c
vendored
114
spec/fixtures/parsers/arithmetic.c
vendored
|
|
@ -2,28 +2,28 @@
|
|||
#include <ctype.h>
|
||||
|
||||
enum ts_symbol {
|
||||
ts_symbol_factor,
|
||||
ts_aux_token1,
|
||||
ts_symbol_plus,
|
||||
ts_aux_token2,
|
||||
ts_symbol_factor,
|
||||
ts_symbol_variable,
|
||||
ts_symbol_term,
|
||||
ts_symbol_expression,
|
||||
ts_aux_token1,
|
||||
ts_symbol_number,
|
||||
ts_symbol_times,
|
||||
ts_symbol_term,
|
||||
ts_symbol_variable,
|
||||
ts_symbol_expression,
|
||||
ts_aux_token2,
|
||||
ts_symbol___END__,
|
||||
};
|
||||
|
||||
static const char *ts_symbol_names[] = {
|
||||
"factor",
|
||||
"token1",
|
||||
"plus",
|
||||
"token2",
|
||||
"factor",
|
||||
"variable",
|
||||
"term",
|
||||
"expression",
|
||||
"token1",
|
||||
"number",
|
||||
"times",
|
||||
"term",
|
||||
"variable",
|
||||
"expression",
|
||||
"token2",
|
||||
"__END__",
|
||||
};
|
||||
|
||||
|
|
@ -31,58 +31,58 @@ static void ts_lex(TSParser *parser) {
|
|||
START_LEXER();
|
||||
switch (LEX_STATE()) {
|
||||
case 0:
|
||||
if ((LOOKAHEAD_CHAR() == '\0'))
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(1, EXPECT({"<EOF>"}));
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_symbol___END__);
|
||||
case 2:
|
||||
if ((LOOKAHEAD_CHAR() == '*'))
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(3);
|
||||
if ((LOOKAHEAD_CHAR() == '\0'))
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(2, EXPECT({"<EOF>", "*"}));
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_symbol_times);
|
||||
case 4:
|
||||
if ((LOOKAHEAD_CHAR() == ')'))
|
||||
if (LOOKAHEAD_CHAR() == ')')
|
||||
ADVANCE(5);
|
||||
LEX_ERROR(1, EXPECT({")"}));
|
||||
case 5:
|
||||
ACCEPT_TOKEN(ts_aux_token2);
|
||||
case 6:
|
||||
if ((LOOKAHEAD_CHAR() == ')'))
|
||||
if (LOOKAHEAD_CHAR() == ')')
|
||||
ADVANCE(5);
|
||||
if ((LOOKAHEAD_CHAR() == '*'))
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR(1, EXPECT({")-*"}));
|
||||
case 7:
|
||||
if ((LOOKAHEAD_CHAR() == ')'))
|
||||
if (LOOKAHEAD_CHAR() == ')')
|
||||
ADVANCE(5);
|
||||
if ((LOOKAHEAD_CHAR() == '*'))
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(3);
|
||||
if ((LOOKAHEAD_CHAR() == '+'))
|
||||
if (LOOKAHEAD_CHAR() == '+')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR(1, EXPECT({")-+"}));
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_symbol_plus);
|
||||
case 9:
|
||||
if ((LOOKAHEAD_CHAR() == ')'))
|
||||
if (LOOKAHEAD_CHAR() == ')')
|
||||
ADVANCE(5);
|
||||
if ((LOOKAHEAD_CHAR() == '+'))
|
||||
if (LOOKAHEAD_CHAR() == '+')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR(2, EXPECT({")", "+"}));
|
||||
case 10:
|
||||
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
|
||||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
|
||||
ADVANCE(13);
|
||||
if ((LOOKAHEAD_CHAR() == '('))
|
||||
if (LOOKAHEAD_CHAR() == '(')
|
||||
ADVANCE(12);
|
||||
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR(4, EXPECT({"(", "0-9", "A-Z", "a-z"}));
|
||||
case 11:
|
||||
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(11);
|
||||
ACCEPT_TOKEN(ts_symbol_number);
|
||||
case 12:
|
||||
|
|
@ -93,17 +93,17 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(13);
|
||||
ACCEPT_TOKEN(ts_symbol_variable);
|
||||
case 14:
|
||||
if ((LOOKAHEAD_CHAR() == '+'))
|
||||
if (LOOKAHEAD_CHAR() == '+')
|
||||
ADVANCE(8);
|
||||
if ((LOOKAHEAD_CHAR() == '\0'))
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(2, EXPECT({"<EOF>", "+"}));
|
||||
case 15:
|
||||
if ((LOOKAHEAD_CHAR() == '*'))
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(3);
|
||||
if ((LOOKAHEAD_CHAR() == '+'))
|
||||
if (LOOKAHEAD_CHAR() == '+')
|
||||
ADVANCE(8);
|
||||
if ((LOOKAHEAD_CHAR() == '\0'))
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(2, EXPECT({"<EOF>", "*-+"}));
|
||||
default:
|
||||
|
|
@ -118,16 +118,16 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 0:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(45);
|
||||
case ts_aux_token1:
|
||||
SHIFT(42);
|
||||
case ts_symbol_number:
|
||||
SHIFT(41);
|
||||
case ts_symbol_term:
|
||||
SHIFT(2);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(45);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(41);
|
||||
case ts_symbol_term:
|
||||
SHIFT(2);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(1);
|
||||
default:
|
||||
|
|
@ -188,14 +188,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 6:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(32);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -250,14 +250,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 11:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(23);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -280,14 +280,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 13:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(14);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -354,14 +354,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 19:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(20);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -450,14 +450,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 28:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(29);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -542,14 +542,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 37:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(38);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -584,10 +584,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 41:
|
||||
SET_LEX_STATE(15);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_times:
|
||||
REDUCE(ts_symbol_factor, 1, COLLAPSE({0}));
|
||||
case ts_symbol_plus:
|
||||
REDUCE(ts_symbol_factor, 1, COLLAPSE({0}));
|
||||
case ts_symbol_times:
|
||||
REDUCE(ts_symbol_factor, 1, COLLAPSE({0}));
|
||||
case ts_symbol___END__:
|
||||
REDUCE(ts_symbol_factor, 1, COLLAPSE({0}));
|
||||
default:
|
||||
|
|
@ -596,14 +596,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 42:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(43);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -622,10 +622,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 44:
|
||||
SET_LEX_STATE(15);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_times:
|
||||
REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1}));
|
||||
case ts_symbol_plus:
|
||||
REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1}));
|
||||
case ts_symbol_times:
|
||||
REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1}));
|
||||
case ts_symbol___END__:
|
||||
REDUCE(ts_symbol_factor, 3, COLLAPSE({1, 0, 1}));
|
||||
default:
|
||||
|
|
@ -634,10 +634,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 45:
|
||||
SET_LEX_STATE(15);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol___END__:
|
||||
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
|
||||
case ts_symbol_plus:
|
||||
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
|
||||
case ts_symbol___END__:
|
||||
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
|
||||
case ts_symbol_times:
|
||||
SHIFT(46);
|
||||
default:
|
||||
|
|
@ -670,14 +670,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 48:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(49);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
|
|||
242
spec/fixtures/parsers/json.c
vendored
242
spec/fixtures/parsers/json.c
vendored
|
|
@ -4,116 +4,154 @@
|
|||
enum ts_symbol {
|
||||
ts_aux_token6,
|
||||
ts_symbol_number,
|
||||
ts_symbol_object,
|
||||
ts_aux_token5,
|
||||
ts_aux_token7,
|
||||
ts_aux_token4,
|
||||
ts_aux_repeat_helper2,
|
||||
ts_aux_token1,
|
||||
ts_aux_token3,
|
||||
ts_symbol_value,
|
||||
ts_symbol_string,
|
||||
ts_aux_token2,
|
||||
ts_aux_token3,
|
||||
ts_aux_token5,
|
||||
ts_symbol_array,
|
||||
ts_aux_repeat_helper1,
|
||||
ts_aux_token7,
|
||||
ts_aux_token4,
|
||||
ts_symbol___END__,
|
||||
ts_aux_token2,
|
||||
ts_aux_repeat_helper2,
|
||||
ts_aux_token1,
|
||||
ts_symbol_object,
|
||||
ts_symbol_value,
|
||||
};
|
||||
|
||||
static const char *ts_symbol_names[] = {
|
||||
"token6",
|
||||
"number",
|
||||
"object",
|
||||
"token5",
|
||||
"token7",
|
||||
"token4",
|
||||
"repeat_helper2",
|
||||
"token1",
|
||||
"token3",
|
||||
"value",
|
||||
"string",
|
||||
"token2",
|
||||
"token3",
|
||||
"token5",
|
||||
"array",
|
||||
"repeat_helper1",
|
||||
"token7",
|
||||
"token4",
|
||||
"__END__",
|
||||
"token2",
|
||||
"repeat_helper2",
|
||||
"token1",
|
||||
"object",
|
||||
"value",
|
||||
};
|
||||
|
||||
static void ts_lex(TSParser *parser) {
|
||||
START_LEXER();
|
||||
switch (LEX_STATE()) {
|
||||
case 0:
|
||||
if ((LOOKAHEAD_CHAR() == '\0'))
|
||||
if (LOOKAHEAD_CHAR() == '\0')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR(1, EXPECT({"<EOF>"}));
|
||||
case 1:
|
||||
ACCEPT_TOKEN(ts_symbol___END__);
|
||||
case 2:
|
||||
if ((LOOKAHEAD_CHAR() == ','))
|
||||
if (LOOKAHEAD_CHAR() == ',')
|
||||
ADVANCE(3);
|
||||
ACCEPT_TOKEN(ts_aux_token3);
|
||||
case 3:
|
||||
ACCEPT_TOKEN(ts_aux_token2);
|
||||
case 4:
|
||||
if ((LOOKAHEAD_CHAR() == ']'))
|
||||
if (LOOKAHEAD_CHAR() == ']')
|
||||
ADVANCE(5);
|
||||
LEX_ERROR(1, EXPECT({"]"}));
|
||||
case 5:
|
||||
ACCEPT_TOKEN(ts_aux_token4);
|
||||
case 6:
|
||||
if ((LOOKAHEAD_CHAR() == ']'))
|
||||
if (LOOKAHEAD_CHAR() == ']')
|
||||
ADVANCE(5);
|
||||
if ((LOOKAHEAD_CHAR() == ','))
|
||||
if (LOOKAHEAD_CHAR() == ',')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR(2, EXPECT({",", "]"}));
|
||||
case 7:
|
||||
if ((LOOKAHEAD_CHAR() == '}'))
|
||||
if (LOOKAHEAD_CHAR() == '}')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR(1, EXPECT({"}"}));
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_aux_token7);
|
||||
case 9:
|
||||
if ((LOOKAHEAD_CHAR() == '}'))
|
||||
if (LOOKAHEAD_CHAR() == '}')
|
||||
ADVANCE(8);
|
||||
if ((LOOKAHEAD_CHAR() == ','))
|
||||
if (LOOKAHEAD_CHAR() == ',')
|
||||
ADVANCE(3);
|
||||
LEX_ERROR(2, EXPECT({",", "}"}));
|
||||
case 10:
|
||||
if ((LOOKAHEAD_CHAR() == '{'))
|
||||
ADVANCE(16);
|
||||
if ((LOOKAHEAD_CHAR() == '['))
|
||||
ADVANCE(15);
|
||||
if ((LOOKAHEAD_CHAR() == '\"'))
|
||||
if (LOOKAHEAD_CHAR() == '[')
|
||||
ADVANCE(18);
|
||||
if (LOOKAHEAD_CHAR() == '{')
|
||||
ADVANCE(19);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(12);
|
||||
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR(4, EXPECT({"\"", "0-9", "[", "{"}));
|
||||
case 11:
|
||||
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
|
||||
if ('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9')
|
||||
ADVANCE(11);
|
||||
ACCEPT_TOKEN(ts_symbol_number);
|
||||
case 12:
|
||||
if (!((LOOKAHEAD_CHAR() == '\"')))
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(14);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(15);
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(13);
|
||||
LEX_ERROR(2, EXPECT({"<EOF>-!", "#-<MAX>"}));
|
||||
case 13:
|
||||
if ((LOOKAHEAD_CHAR() == '\"'))
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(17);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(14);
|
||||
if (!((LOOKAHEAD_CHAR() == '\"')))
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(15);
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(13);
|
||||
LEX_ERROR(1, EXPECT({"<ANY>"}));
|
||||
case 14:
|
||||
ACCEPT_TOKEN(ts_symbol_string);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(16);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(14);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(15);
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(13);
|
||||
if ('#' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\"')
|
||||
ADVANCE(13);
|
||||
LEX_ERROR(2, EXPECT({"<ANY>", "#-\""}));
|
||||
case 15:
|
||||
ACCEPT_TOKEN(ts_aux_token1);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(13);
|
||||
LEX_ERROR(1, EXPECT({"\""}));
|
||||
case 16:
|
||||
ACCEPT_TOKEN(ts_aux_token5);
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(17);
|
||||
if (LOOKAHEAD_CHAR() == '\\')
|
||||
ADVANCE(14);
|
||||
if (']' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '\\')
|
||||
ADVANCE(15);
|
||||
if (!((LOOKAHEAD_CHAR() == '\"') ||
|
||||
(LOOKAHEAD_CHAR() == '\\')))
|
||||
ADVANCE(13);
|
||||
ACCEPT_TOKEN(ts_symbol_string);
|
||||
case 17:
|
||||
if ((LOOKAHEAD_CHAR() == ':'))
|
||||
ADVANCE(18);
|
||||
LEX_ERROR(1, EXPECT({":"}));
|
||||
ACCEPT_TOKEN(ts_symbol_string);
|
||||
case 18:
|
||||
ACCEPT_TOKEN(ts_aux_token6);
|
||||
ACCEPT_TOKEN(ts_aux_token1);
|
||||
case 19:
|
||||
if ((LOOKAHEAD_CHAR() == '\"'))
|
||||
ACCEPT_TOKEN(ts_aux_token5);
|
||||
case 20:
|
||||
if (LOOKAHEAD_CHAR() == ':')
|
||||
ADVANCE(21);
|
||||
LEX_ERROR(1, EXPECT({":"}));
|
||||
case 21:
|
||||
ACCEPT_TOKEN(ts_aux_token6);
|
||||
case 22:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(12);
|
||||
LEX_ERROR(1, EXPECT({"\""}));
|
||||
default:
|
||||
|
|
@ -132,16 +170,16 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(53);
|
||||
case ts_symbol_array:
|
||||
SHIFT(53);
|
||||
case ts_symbol_value:
|
||||
SHIFT(1);
|
||||
case ts_symbol_object:
|
||||
SHIFT(53);
|
||||
case ts_symbol_number:
|
||||
SHIFT(53);
|
||||
case ts_aux_token5:
|
||||
SHIFT(47);
|
||||
case ts_symbol_number:
|
||||
SHIFT(53);
|
||||
case ts_aux_token1:
|
||||
SHIFT(2);
|
||||
case ts_symbol_value:
|
||||
SHIFT(1);
|
||||
default:
|
||||
PARSE_PANIC();
|
||||
}
|
||||
|
|
@ -160,14 +198,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(44);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_value:
|
||||
SHIFT(44);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -180,14 +218,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(4);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_value:
|
||||
SHIFT(4);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -230,14 +268,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(43);
|
||||
case ts_symbol_array:
|
||||
SHIFT(43);
|
||||
case ts_symbol_value:
|
||||
SHIFT(41);
|
||||
case ts_symbol_object:
|
||||
SHIFT(43);
|
||||
case ts_symbol_number:
|
||||
SHIFT(43);
|
||||
case ts_symbol_value:
|
||||
SHIFT(41);
|
||||
case ts_aux_token5:
|
||||
SHIFT(35);
|
||||
case ts_symbol_number:
|
||||
SHIFT(43);
|
||||
case ts_aux_token1:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -250,14 +288,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(9);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_value:
|
||||
SHIFT(9);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -294,7 +332,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 12:
|
||||
SET_LEX_STATE(19);
|
||||
SET_LEX_STATE(22);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_string:
|
||||
SHIFT(13);
|
||||
|
|
@ -302,7 +340,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 13:
|
||||
SET_LEX_STATE(17);
|
||||
SET_LEX_STATE(20);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token6:
|
||||
SHIFT(14);
|
||||
|
|
@ -316,14 +354,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(15);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(15);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -360,7 +398,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 18:
|
||||
SET_LEX_STATE(19);
|
||||
SET_LEX_STATE(22);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_string:
|
||||
SHIFT(19);
|
||||
|
|
@ -368,7 +406,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 19:
|
||||
SET_LEX_STATE(17);
|
||||
SET_LEX_STATE(20);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token6:
|
||||
SHIFT(20);
|
||||
|
|
@ -382,14 +420,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(34);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_value:
|
||||
SHIFT(32);
|
||||
case ts_symbol_object:
|
||||
SHIFT(34);
|
||||
case ts_symbol_number:
|
||||
SHIFT(34);
|
||||
case ts_symbol_value:
|
||||
SHIFT(32);
|
||||
case ts_aux_token5:
|
||||
SHIFT(26);
|
||||
case ts_symbol_number:
|
||||
SHIFT(34);
|
||||
case ts_aux_token1:
|
||||
SHIFT(21);
|
||||
default:
|
||||
|
|
@ -402,14 +440,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(22);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_value:
|
||||
SHIFT(22);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -456,7 +494,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 26:
|
||||
SET_LEX_STATE(19);
|
||||
SET_LEX_STATE(22);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_string:
|
||||
SHIFT(27);
|
||||
|
|
@ -464,7 +502,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 27:
|
||||
SET_LEX_STATE(17);
|
||||
SET_LEX_STATE(20);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token6:
|
||||
SHIFT(28);
|
||||
|
|
@ -478,14 +516,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(29);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(29);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -524,10 +562,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 32:
|
||||
SET_LEX_STATE(9);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token2:
|
||||
SHIFT(18);
|
||||
case ts_aux_token7:
|
||||
REDUCE(ts_aux_repeat_helper1, 4, COLLAPSE({1, 0, 1, 0}));
|
||||
case ts_aux_token2:
|
||||
SHIFT(18);
|
||||
case ts_aux_repeat_helper1:
|
||||
SHIFT(33);
|
||||
default:
|
||||
|
|
@ -552,7 +590,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 35:
|
||||
SET_LEX_STATE(19);
|
||||
SET_LEX_STATE(22);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_string:
|
||||
SHIFT(36);
|
||||
|
|
@ -560,7 +598,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 36:
|
||||
SET_LEX_STATE(17);
|
||||
SET_LEX_STATE(20);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token6:
|
||||
SHIFT(37);
|
||||
|
|
@ -574,14 +612,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(38);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(38);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -620,10 +658,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 41:
|
||||
SET_LEX_STATE(6);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token4:
|
||||
REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0}));
|
||||
case ts_aux_token2:
|
||||
SHIFT(7);
|
||||
case ts_aux_token4:
|
||||
REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0}));
|
||||
case ts_aux_repeat_helper2:
|
||||
SHIFT(42);
|
||||
default:
|
||||
|
|
@ -676,7 +714,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 47:
|
||||
SET_LEX_STATE(19);
|
||||
SET_LEX_STATE(22);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_string:
|
||||
SHIFT(48);
|
||||
|
|
@ -684,7 +722,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_PANIC();
|
||||
}
|
||||
case 48:
|
||||
SET_LEX_STATE(17);
|
||||
SET_LEX_STATE(20);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token6:
|
||||
SHIFT(49);
|
||||
|
|
@ -698,14 +736,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(50);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(50);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ describe("json", []() {
|
|||
});
|
||||
|
||||
it("parses strings", [&]() {
|
||||
TSDocumentSetText(document, "\"this is a string\"");
|
||||
TSDocumentSetText(document, "\"this is a \\\"string\\\" \"");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (string))"));
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
#include "item_set_transitions.h"
|
||||
#include "item_set_closure.h"
|
||||
#include "rule_transitions.h"
|
||||
#include "merge_transitions.h"
|
||||
|
||||
using std::dynamic_pointer_cast;
|
||||
using std::make_shared;
|
||||
using std::shared_ptr;
|
||||
|
||||
|
|
@ -19,7 +19,7 @@ namespace tree_sitter {
|
|||
|
||||
transition_map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar) {
|
||||
transition_map<CharacterSet, LexItemSet> result;
|
||||
for (LexItem item : item_set) {
|
||||
for (const LexItem &item : item_set) {
|
||||
transition_map<CharacterSet, LexItemSet> item_transitions;
|
||||
for (auto transition : char_transitions(item.rule)) {
|
||||
auto rule = transition.first;
|
||||
|
|
@ -28,7 +28,7 @@ namespace tree_sitter {
|
|||
item_transitions.add(rule, make_shared<LexItemSet>(new_item_set));
|
||||
}
|
||||
|
||||
result.merge(item_transitions, [](shared_ptr<const LexItemSet> left, shared_ptr<const LexItemSet> right) -> shared_ptr<const LexItemSet> {
|
||||
result = merge_char_transitions<LexItemSet>(result, item_transitions, [](shared_ptr<LexItemSet> left, shared_ptr<LexItemSet> right) {
|
||||
return make_shared<LexItemSet>(merge_sets(*left, *right));
|
||||
});
|
||||
}
|
||||
|
|
@ -38,7 +38,7 @@ namespace tree_sitter {
|
|||
|
||||
transition_map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar) {
|
||||
transition_map<rules::Symbol, ParseItemSet> result;
|
||||
for (ParseItem item : item_set) {
|
||||
for (const ParseItem &item : item_set) {
|
||||
transition_map<rules::Symbol, ParseItemSet> item_transitions;
|
||||
for (auto transition : sym_transitions(item.rule)) {
|
||||
auto rule = transition.first;
|
||||
|
|
@ -49,7 +49,7 @@ namespace tree_sitter {
|
|||
item_transitions.add(rule, make_shared<ParseItemSet>(new_item_set));
|
||||
}
|
||||
|
||||
result.merge(item_transitions, [](shared_ptr<const ParseItemSet> left, shared_ptr<const ParseItemSet> right) -> shared_ptr<const ParseItemSet> {
|
||||
result = merge_sym_transitions<ParseItemSet>(result, item_transitions, [](shared_ptr<ParseItemSet> left, shared_ptr<ParseItemSet> right) {
|
||||
return make_shared<ParseItemSet>(merge_sets(*left, *right));
|
||||
});
|
||||
}
|
||||
|
|
|
|||
56
src/compiler/build_tables/merge_transitions.h
Normal file
56
src/compiler/build_tables/merge_transitions.h
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
#ifndef __tree_sitter__merge_transitions__
|
||||
#define __tree_sitter__merge_transitions__
|
||||
|
||||
#include "transition_map.h"
|
||||
#include "character_set.h"
|
||||
#include "symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
template<typename T>
|
||||
transition_map<rules::Symbol, T>
|
||||
merge_sym_transitions(const transition_map<rules::Symbol, T> &left,
|
||||
const transition_map<rules::Symbol, T> &right,
|
||||
std::function<std::shared_ptr<T>(std::shared_ptr<T>, std::shared_ptr<T>)> merge_fn) {
|
||||
transition_map<rules::Symbol, T> result(left);
|
||||
for (auto &pair : right) {
|
||||
auto rule = pair.first;
|
||||
bool merged = false;
|
||||
for (auto &existing_pair : result) {
|
||||
auto existing_rule = existing_pair.first;
|
||||
if (existing_rule->operator==(*rule)) {
|
||||
existing_pair.second = merge_fn(existing_pair.second, pair.second);
|
||||
merged = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!merged)
|
||||
result.add(pair.first, pair.second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
transition_map<rules::CharacterSet, T>
|
||||
merge_char_transitions(const transition_map<rules::CharacterSet, T> &left,
|
||||
const transition_map<rules::CharacterSet, T> &right,
|
||||
std::function<std::shared_ptr<T>(std::shared_ptr<T>, std::shared_ptr<T>)> merge_fn) {
|
||||
transition_map<rules::CharacterSet, T> result(left);
|
||||
for (auto &pair : right) {
|
||||
auto rule = pair.first;
|
||||
for (auto &existing_pair : left) {
|
||||
auto existing_rule = existing_pair.first;
|
||||
auto intersection = existing_rule->remove_set(*rule);
|
||||
if (!intersection.is_empty()) {
|
||||
rule->remove_set(intersection);
|
||||
result.add(std::make_shared<rules::CharacterSet>(intersection), merge_fn(existing_pair.second, pair.second));
|
||||
}
|
||||
}
|
||||
result.add(rule, pair.second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
#include "rule_transitions.h"
|
||||
#include "rules.h"
|
||||
#include "merge_transitions.h"
|
||||
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
|
|
@ -9,6 +10,23 @@ namespace tree_sitter {
|
|||
return typeid(*rule) == typeid(Blank);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
transition_map<T, Rule> merge_transitions(const transition_map<T, Rule> &left, const transition_map<T, Rule> &right);
|
||||
|
||||
template<>
|
||||
transition_map<CharacterSet, Rule> merge_transitions(const transition_map<CharacterSet, Rule> &left, const transition_map<CharacterSet, Rule> &right) {
|
||||
return merge_char_transitions<Rule>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
transition_map<Symbol, Rule> merge_transitions(const transition_map<Symbol, Rule> &left, const transition_map<Symbol, Rule> &right) {
|
||||
return merge_sym_transitions<Rule>(left, right, [](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
class TransitionsVisitor : public rules::Visitor {
|
||||
public:
|
||||
|
|
@ -23,7 +41,7 @@ namespace tree_sitter {
|
|||
void visit_atom(const Rule *rule) {
|
||||
auto atom = dynamic_cast<const T *>(rule);
|
||||
if (atom) {
|
||||
value = transition_map<T, Rule>({{ std::make_shared<const T>(*atom), blank() }});
|
||||
value = transition_map<T, Rule>({{ std::make_shared<T>(*atom), blank() }});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -37,9 +55,7 @@ namespace tree_sitter {
|
|||
|
||||
void visit(const Choice *rule) {
|
||||
value = transitions(rule->left);
|
||||
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
value = merge_transitions<T>(transitions(rule->left), transitions(rule->right));
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
|
|
@ -50,9 +66,7 @@ namespace tree_sitter {
|
|||
return seq({ left_rule, rule->right });
|
||||
});
|
||||
if (rule_can_be_blank(rule->left)) {
|
||||
value.merge(transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
value = merge_transitions<T>(value, transitions(rule->right));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,13 +8,14 @@
|
|||
namespace tree_sitter {
|
||||
template<typename TKey, typename TValue>
|
||||
class transition_map {
|
||||
typedef std::shared_ptr<const TKey> TKeyPtr;
|
||||
typedef std::shared_ptr<const TValue> TValuePtr;
|
||||
typedef std::pair<const TKeyPtr, TValuePtr> pair_type;
|
||||
typedef std::shared_ptr<TKey> TKeyPtr;
|
||||
typedef std::shared_ptr<TValue> TValuePtr;
|
||||
typedef std::pair<TKeyPtr, TValuePtr> pair_type;
|
||||
typedef std::vector<pair_type> contents_type;
|
||||
|
||||
contents_type contents;
|
||||
|
||||
public:
|
||||
|
||||
transition_map() : contents(contents_type()) {};
|
||||
transition_map(std::vector<pair_type> pairs) : contents(pairs) {};
|
||||
|
||||
|
|
@ -33,15 +34,6 @@ namespace tree_sitter {
|
|||
contents.push_back(pair_type(key, value));
|
||||
}
|
||||
|
||||
void merge(const transition_map<TKey, TValue> &other, std::function<TValuePtr(TValuePtr, TValuePtr)> merge_fn) {
|
||||
for (pair_type other_pair : other) {
|
||||
if (pair_type *current_pair = pair_for_key(*other_pair.first))
|
||||
current_pair->second = merge_fn(current_pair->second, other_pair.second);
|
||||
else
|
||||
add(other_pair.first, other_pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
TValuePtr operator[](const TKey &key) const {
|
||||
for (auto pair : *this) {
|
||||
if (*pair.first == key) {
|
||||
|
|
@ -52,7 +44,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
template<typename NewV>
|
||||
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<const NewV>(TValuePtr)> map_fn) {
|
||||
transition_map<TKey, NewV> map(std::function<const std::shared_ptr<NewV>(TValuePtr)> map_fn) {
|
||||
transition_map<TKey, NewV> result;
|
||||
for (pair_type pair : *this) {
|
||||
auto new_value = map_fn(pair.second);
|
||||
|
|
@ -70,18 +62,6 @@ namespace tree_sitter {
|
|||
const_iterator begin() const { return contents.begin(); }
|
||||
const_iterator end() const { return contents.end(); }
|
||||
size_t size() const { return contents.size(); }
|
||||
|
||||
private:
|
||||
|
||||
pair_type * pair_for_key(const TKey &key) {
|
||||
for (int i = 0; i < contents.size(); i++) {
|
||||
pair_type *pair = &contents[i];
|
||||
if (*pair->first == key) return pair;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
contents_type contents;
|
||||
};
|
||||
|
||||
template<typename K, typename V>
|
||||
|
|
|
|||
|
|
@ -98,6 +98,8 @@ namespace tree_sitter {
|
|||
return "\\0";
|
||||
case '"':
|
||||
return "\\\"";
|
||||
case '\\':
|
||||
return "\\\\";
|
||||
default:
|
||||
return string() + character;
|
||||
}
|
||||
|
|
@ -108,16 +110,20 @@ namespace tree_sitter {
|
|||
if (range.min == range.max) {
|
||||
return lookahead + " == '" + character_code(range.min) + "'";
|
||||
} else {
|
||||
return string("'") + range.min + string("' <= ") + lookahead +
|
||||
" && " + lookahead + " <= '" + range.max + "'";
|
||||
return string("'") + character_code(range.min) + string("' <= ") + lookahead +
|
||||
" && " + lookahead + " <= '" + character_code(range.max) + "'";
|
||||
}
|
||||
}
|
||||
|
||||
string condition_for_character_set(const rules::CharacterSet &set) {
|
||||
vector<string> parts;
|
||||
for (auto &match : set.ranges)
|
||||
parts.push_back("(" + condition_for_character_range(match) + ")");
|
||||
return join(parts, " ||\n ");
|
||||
if (set.ranges.size() == 1) {
|
||||
return condition_for_character_range(*set.ranges.begin());
|
||||
} else {
|
||||
for (auto &match : set.ranges)
|
||||
parts.push_back("(" + condition_for_character_range(match) + ")");
|
||||
return join(parts, " ||\n ");
|
||||
}
|
||||
}
|
||||
|
||||
string condition_for_character_rule(const rules::CharacterSet &rule) {
|
||||
|
|
|
|||
|
|
@ -154,6 +154,10 @@ namespace tree_sitter {
|
|||
return removed_set;
|
||||
}
|
||||
|
||||
bool CharacterSet::is_empty() const {
|
||||
return ranges.empty();
|
||||
}
|
||||
|
||||
void CharacterSet::add_set(const CharacterSet &other) {
|
||||
for (auto &other_range : other.ranges) {
|
||||
add_range(this, other_range);
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ namespace tree_sitter {
|
|||
CharacterSet complement() const;
|
||||
CharacterSet intersect(const CharacterSet &) const;
|
||||
std::pair<CharacterSet, bool> most_compact_representation() const;
|
||||
bool is_empty() const;
|
||||
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
|
|
@ -51,7 +52,7 @@ namespace tree_sitter {
|
|||
std::set<CharacterRange> ranges;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const CharacterSet> char_ptr;
|
||||
typedef std::shared_ptr<CharacterSet> char_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ namespace tree_sitter {
|
|||
class Visitor;
|
||||
class Rule;
|
||||
|
||||
typedef std::shared_ptr<const Rule> rule_ptr;
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
|
||||
class Rule {
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ namespace tree_sitter {
|
|||
bool is_auxiliary;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Symbol> sym_ptr;
|
||||
typedef std::shared_ptr<Symbol> sym_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@
|
|||
125120A218307FFD00C9B56A /* test_grammars.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = test_grammars.h; path = spec/fixtures/grammars/test_grammars.h; sourceTree = SOURCE_ROOT; };
|
||||
125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; };
|
||||
12661BF318A1505A00A259FB /* character_set_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = character_set_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
127528AF18A6F9C6006B682B /* merge_transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = merge_transitions.h; sourceTree = "<group>"; };
|
||||
12AB465D188BD03E00DE79DF /* follow_sets.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = follow_sets.cpp; sourceTree = "<group>"; };
|
||||
12AB465E188BD03E00DE79DF /* follow_sets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = follow_sets.h; sourceTree = "<group>"; };
|
||||
12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = item_set_closure_spec.cpp; sourceTree = "<group>"; };
|
||||
|
|
@ -228,6 +229,7 @@
|
|||
12EDCFBF18820880005A7A07 /* item_set_closure.h */,
|
||||
12EDCFC118820A70005A7A07 /* item_set_transitions.cpp */,
|
||||
12EDCFC218820A70005A7A07 /* item_set_transitions.h */,
|
||||
127528AF18A6F9C6006B682B /* merge_transitions.h */,
|
||||
12EDCFA418820137005A7A07 /* perform.cpp */,
|
||||
12EDCFA518820137005A7A07 /* perform.h */,
|
||||
12EDCFA618820137005A7A07 /* rule_transitions.cpp */,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue